summaryrefslogtreecommitdiffstats
path: root/qa/tasks/cephfs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /qa/tasks/cephfs
parentInitial commit. (diff)
downloadceph-b26c4052f3542036551aa9dec9caa4226e456195.tar.xz
ceph-b26c4052f3542036551aa9dec9caa4226e456195.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--qa/tasks/cephfs/__init__.py0
-rw-r--r--qa/tasks/cephfs/caps_helper.py195
-rw-r--r--qa/tasks/cephfs/cephfs_test_case.py442
-rw-r--r--qa/tasks/cephfs/filesystem.py1712
-rw-r--r--qa/tasks/cephfs/fuse_mount.py533
-rw-r--r--qa/tasks/cephfs/kernel_mount.py394
-rw-r--r--qa/tasks/cephfs/mount.py1570
-rw-r--r--qa/tasks/cephfs/test_acls.py39
-rw-r--r--qa/tasks/cephfs/test_admin.py1494
-rw-r--r--qa/tasks/cephfs/test_auto_repair.py88
-rw-r--r--qa/tasks/cephfs/test_backtrace.py102
-rw-r--r--qa/tasks/cephfs/test_cap_flush.py58
-rw-r--r--qa/tasks/cephfs/test_cephfs_shell.py1167
-rw-r--r--qa/tasks/cephfs/test_client_limits.py397
-rw-r--r--qa/tasks/cephfs/test_client_recovery.py757
-rw-r--r--qa/tasks/cephfs/test_damage.py663
-rw-r--r--qa/tasks/cephfs/test_data_scan.py796
-rw-r--r--qa/tasks/cephfs/test_dump_tree.py66
-rw-r--r--qa/tasks/cephfs/test_exports.py582
-rw-r--r--qa/tasks/cephfs/test_failover.py819
-rw-r--r--qa/tasks/cephfs/test_flush.py112
-rw-r--r--qa/tasks/cephfs/test_forward_scrub.py307
-rw-r--r--qa/tasks/cephfs/test_fragment.py359
-rw-r--r--qa/tasks/cephfs/test_fscrypt.py77
-rw-r--r--qa/tasks/cephfs/test_fstop.py114
-rw-r--r--qa/tasks/cephfs/test_full.py398
-rw-r--r--qa/tasks/cephfs/test_journal_migration.py100
-rw-r--r--qa/tasks/cephfs/test_journal_repair.py405
-rw-r--r--qa/tasks/cephfs/test_mantle.py111
-rw-r--r--qa/tasks/cephfs/test_mds_metrics.py643
-rw-r--r--qa/tasks/cephfs/test_meta_injection.py38
-rw-r--r--qa/tasks/cephfs/test_mirroring.py1298
-rw-r--r--qa/tasks/cephfs/test_misc.py640
-rw-r--r--qa/tasks/cephfs/test_multifs_auth.py297
-rw-r--r--qa/tasks/cephfs/test_multimds_misc.py223
-rw-r--r--qa/tasks/cephfs/test_newops.py18
-rw-r--r--qa/tasks/cephfs/test_nfs.py880
-rw-r--r--qa/tasks/cephfs/test_openfiletable.py85
-rw-r--r--qa/tasks/cephfs/test_pool_perm.py109
-rw-r--r--qa/tasks/cephfs/test_quota.py106
-rw-r--r--qa/tasks/cephfs/test_readahead.py26
-rw-r--r--qa/tasks/cephfs/test_recovery_fs.py38
-rw-r--r--qa/tasks/cephfs/test_recovery_pool.py179
-rw-r--r--qa/tasks/cephfs/test_scrub.py187
-rw-r--r--qa/tasks/cephfs/test_scrub_checks.py462
-rw-r--r--qa/tasks/cephfs/test_sessionmap.py232
-rw-r--r--qa/tasks/cephfs/test_snap_schedules.py607
-rw-r--r--qa/tasks/cephfs/test_snapshots.py605
-rw-r--r--qa/tasks/cephfs/test_strays.py1027
-rw-r--r--qa/tasks/cephfs/test_subvolume.py170
-rw-r--r--qa/tasks/cephfs/test_volumes.py7946
-rw-r--r--qa/tasks/cephfs/xfstests_dev.py303
-rw-r--r--qa/tasks/cephfs_mirror.py73
-rw-r--r--qa/tasks/cephfs_mirror_thrash.py219
-rw-r--r--qa/tasks/cephfs_test_runner.py213
-rw-r--r--qa/tasks/cephfs_upgrade_snap.py47
56 files changed, 30528 insertions, 0 deletions
diff --git a/qa/tasks/cephfs/__init__.py b/qa/tasks/cephfs/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/tasks/cephfs/__init__.py
diff --git a/qa/tasks/cephfs/caps_helper.py b/qa/tasks/cephfs/caps_helper.py
new file mode 100644
index 000000000..ac9bc4401
--- /dev/null
+++ b/qa/tasks/cephfs/caps_helper.py
@@ -0,0 +1,195 @@
+"""
+Helper methods to test that MON and MDS caps are enforced properly.
+"""
+from os.path import join as os_path_join
+from logging import getLogger
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+from teuthology.orchestra.run import Raw
+
+
+log = getLogger(__name__)
+
+
+class CapTester(CephFSTestCase):
+ """
+ Test that MON and MDS caps are enforced.
+
+ MDS caps are tested by exercising read-write permissions and MON caps are
+ tested using output of command "ceph fs ls". Besides, it provides
+ write_test_files() which creates test files at the given path on CephFS
+ mounts passed to it.
+
+ USAGE: Call write_test_files() method at the beginning of the test and
+ once the caps that needs to be tested are assigned to the client and
+ CephFS be remount for caps to effective, call run_cap_tests(),
+ run_mon_cap_tests() or run_mds_cap_tests() as per the need.
+ """
+
+ def write_test_files(self, mounts, testpath=''):
+ """
+ Exercising 'r' and 'w' access levels on a file on CephFS mount is
+ pretty routine across all tests for caps. Adding to method to write
+ that file will reduce clutter in these tests.
+
+ This methods writes a fixed data in a file with a fixed name located
+ at the path passed in testpath for the given list of mounts. If
+ testpath is empty, the file is created at the root of the CephFS.
+ """
+ dirname, filename = 'testdir', 'testfile'
+ self.test_set = []
+ # XXX: The reason behind testpath[1:] below is that the testpath is
+ # supposed to contain a path inside CephFS (which might be passed as
+ # an absolute path). os.path.join() deletes all previous path
+ # components when it encounters a path component starting with '/'.
+ # Deleting the first '/' from the string in testpath ensures that
+ # previous path components are not deleted by os.path.join().
+ if testpath:
+ testpath = testpath[1:] if testpath[0] == '/' else testpath
+ # XXX: passing just '/' screw up os.path.join() ahead.
+ if testpath == '/':
+ testpath = ''
+
+ for mount_x in mounts:
+ log.info(f'creating test file on FS {mount_x.cephfs_name} '
+ f'mounted at {mount_x.mountpoint}...')
+ dirpath = os_path_join(mount_x.hostfs_mntpt, testpath, dirname)
+ mount_x.run_shell(f'mkdir {dirpath}')
+ filepath = os_path_join(dirpath, filename)
+ # XXX: the reason behind adding filepathm, cephfs_name and both
+ # mntpts is to avoid a test bug where we mount cephfs1 but what
+ # ends up being mounted cephfs2. since filepath and filedata are
+ # identical, how would tests figure otherwise that they are
+ # accessing the right filename but on wrong CephFS.
+ filedata = (f'filepath = {filepath}\n'
+ f'cephfs_name = {mount_x.cephfs_name}\n'
+ f'cephfs_mntpt = {mount_x.cephfs_mntpt}\n'
+ f'hostfs_mntpt = {mount_x.hostfs_mntpt}')
+ mount_x.write_file(filepath, filedata)
+ self.test_set.append((mount_x, filepath, filedata))
+ log.info('test file created at {path} with data "{data}.')
+
+ def run_cap_tests(self, perm, mntpt=None):
+ # TODO
+ #self.run_mon_cap_tests()
+ self.run_mds_cap_tests(perm, mntpt=mntpt)
+
+ def _get_fsnames_from_moncap(self, moncap):
+ fsnames = []
+ while moncap.find('fsname=') != -1:
+ fsname_first_char = moncap.index('fsname=') + len('fsname=')
+
+ if ',' in moncap:
+ last = moncap.index(',')
+ fsname = moncap[fsname_first_char : last]
+ moncap = moncap.replace(moncap[0 : last+1], '')
+ else:
+ fsname = moncap[fsname_first_char : ]
+ moncap = moncap.replace(moncap[0 : ], '')
+
+ fsnames.append(fsname)
+
+ return fsnames
+
+ def run_mon_cap_tests(self, def_fs, client_id):
+ """
+ Check that MON cap is enforced for a client by searching for a Ceph
+ FS name in output of cmd "fs ls" executed with that client's caps.
+
+ def_fs stands for default FS on Ceph cluster.
+ """
+ get_cluster_cmd_op = def_fs.mon_manager.raw_cluster_cmd
+
+ keyring = get_cluster_cmd_op(args=f'auth get client.{client_id}')
+
+ moncap = None
+ for line in keyring.split('\n'):
+ if 'caps mon' in line:
+ moncap = line[line.find(' = "') + 4 : -1]
+ break
+ else:
+ raise RuntimeError('run_mon_cap_tests(): mon cap not found in '
+ 'keyring. keyring -\n' + keyring)
+
+ keyring_path = def_fs.admin_remote.mktemp(data=keyring)
+
+ fsls = get_cluster_cmd_op(
+ args=f'fs ls --id {client_id} -k {keyring_path}')
+ log.info(f'output of fs ls cmd run by client.{client_id} -\n{fsls}')
+
+ if 'fsname=' not in moncap:
+ log.info('no FS name is mentioned in moncap, client has '
+ 'permission to list all files. moncap -\n{moncap}')
+ log.info('testing for presence of all FS names in output of '
+ '"fs ls" command run by client.')
+
+ fsls_admin = get_cluster_cmd_op(args='fs ls')
+ log.info('output of fs ls cmd run by admin -\n{fsls_admin}')
+
+ self.assertEqual(fsls, fsls_admin)
+ return
+
+ log.info('FS names are mentioned in moncap. moncap -\n{moncap}')
+ log.info('testing for presence of these FS names in output of '
+ '"fs ls" command run by client.')
+ for fsname in self._get_fsnames_from_moncap(moncap):
+ self.assertIn('name: ' + fsname, fsls)
+
+ def run_mds_cap_tests(self, perm, mntpt=None):
+ """
+ Run test for read perm and, for write perm, run positive test if it
+ is present and run negative test if not.
+ """
+ # XXX: mntpt is path inside cephfs that serves as root for current
+ # mount. Therefore, this path must me deleted from self.filepaths.
+ # Example -
+ # orignal path: /mnt/cephfs_x/dir1/dir2/testdir
+ # cephfs dir serving as root for current mnt: /dir1/dir2
+ # therefore, final path: /mnt/cephfs_x//testdir
+ if mntpt:
+ self.test_set = [(x, y.replace(mntpt, ''), z) for x, y, z in \
+ self.test_set]
+
+ self.conduct_pos_test_for_read_caps()
+
+ if perm == 'rw':
+ self.conduct_pos_test_for_write_caps()
+ elif perm == 'r':
+ self.conduct_neg_test_for_write_caps()
+ else:
+ raise RuntimeError(f'perm = {perm}\nIt should be "r" or "rw".')
+
+ def conduct_pos_test_for_read_caps(self):
+ for mount, path, data in self.test_set:
+ log.info(f'test read perm: read file {path} and expect data '
+ f'"{data}"')
+ contents = mount.read_file(path)
+ self.assertEqual(data, contents)
+ log.info(f'read perm was tested successfully: "{data}" was '
+ f'successfully read from path {path}')
+
+ def conduct_pos_test_for_write_caps(self):
+ for mount, path, data in self.test_set:
+ log.info(f'test write perm: try writing data "{data}" to '
+ f'file {path}.')
+ mount.write_file(path=path, data=data)
+ contents = mount.read_file(path=path)
+ self.assertEqual(data, contents)
+ log.info(f'write perm was tested was successfully: data '
+ f'"{data}" was successfully written to file "{path}".')
+
+ def conduct_neg_test_for_write_caps(self, sudo_write=False):
+ possible_errmsgs = ('permission denied', 'operation not permitted')
+ cmdargs = ['echo', 'some random data', Raw('|')]
+ cmdargs += ['sudo', 'tee'] if sudo_write else ['tee']
+
+ # don't use data, cmd args to write are set already above.
+ for mount, path, data in self.test_set:
+ log.info('test absence of write perm: expect failure '
+ f'writing data to file {path}.')
+ cmdargs.append(path)
+ mount.negtestcmd(args=cmdargs, retval=1, errmsgs=possible_errmsgs)
+ cmdargs.pop(-1)
+ log.info('absence of write perm was tested successfully: '
+ f'failed to be write data to file {path}.')
diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py
new file mode 100644
index 000000000..d2688929c
--- /dev/null
+++ b/qa/tasks/cephfs/cephfs_test_case.py
@@ -0,0 +1,442 @@
+import json
+import logging
+import os
+import re
+
+from shlex import split as shlex_split
+
+from tasks.ceph_test_case import CephTestCase
+
+from teuthology import contextutil
+from teuthology.orchestra import run
+from teuthology.exceptions import CommandFailedError
+
+log = logging.getLogger(__name__)
+
+def classhook(m):
+ def dec(cls):
+ getattr(cls, m)()
+ return cls
+ return dec
+
+def for_teuthology(f):
+ """
+ Decorator that adds an "is_for_teuthology" attribute to the wrapped function
+ """
+ f.is_for_teuthology = True
+ return f
+
+
+def needs_trimming(f):
+ """
+ Mark fn as requiring a client capable of trimming its cache (i.e. for ceph-fuse
+ this means it needs to be able to run as root, currently)
+ """
+ f.needs_trimming = True
+ return f
+
+
+class MountDetails():
+
+ def __init__(self, mntobj):
+ self.client_id = mntobj.client_id
+ self.client_keyring_path = mntobj.client_keyring_path
+ self.client_remote = mntobj.client_remote
+ self.cephfs_name = mntobj.cephfs_name
+ self.cephfs_mntpt = mntobj.cephfs_mntpt
+ self.hostfs_mntpt = mntobj.hostfs_mntpt
+
+ def restore(self, mntobj):
+ mntobj.client_id = self.client_id
+ mntobj.client_keyring_path = self.client_keyring_path
+ mntobj.client_remote = self.client_remote
+ mntobj.cephfs_name = self.cephfs_name
+ mntobj.cephfs_mntpt = self.cephfs_mntpt
+ mntobj.hostfs_mntpt = self.hostfs_mntpt
+
+
+class CephFSTestCase(CephTestCase):
+ """
+ Test case for Ceph FS, requires caller to populate Filesystem and Mounts,
+ into the fs, mount_a, mount_b class attributes (setting mount_b is optional)
+
+ Handles resetting the cluster under test between tests.
+ """
+
+ # FIXME weird explicit naming
+ mount_a = None
+ mount_b = None
+ recovery_mount = None
+
+ # Declarative test requirements: subclasses should override these to indicate
+ # their special needs. If not met, tests will be skipped.
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 1
+ REQUIRE_ONE_CLIENT_REMOTE = False
+
+ # Whether to create the default filesystem during setUp
+ REQUIRE_FILESYSTEM = True
+
+ # create a backup filesystem if required.
+ # required REQUIRE_FILESYSTEM enabled
+ REQUIRE_BACKUP_FILESYSTEM = False
+
+ LOAD_SETTINGS = [] # type: ignore
+
+ def _save_mount_details(self):
+ """
+ XXX: Tests may change details of mount objects, so let's stash them so
+ that these details are restored later to ensure smooth setUps and
+ tearDowns for upcoming tests.
+ """
+ self._orig_mount_details = [MountDetails(m) for m in self.mounts]
+ log.info(self._orig_mount_details)
+
+ def _remove_blocklist(self):
+ # In case anything is in the OSD blocklist list, clear it out. This is to avoid
+ # the OSD map changing in the background (due to blocklist expiry) while tests run.
+ try:
+ self.mds_cluster.mon_manager.run_cluster_cmd(args="osd blocklist clear")
+ except CommandFailedError:
+ # Fallback for older Ceph cluster
+ try:
+ blocklist = json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd("osd",
+ "dump", "--format=json-pretty"))['blocklist']
+ log.info(f"Removing {len(blocklist)} blocklist entries")
+ for addr, blocklisted_at in blocklist.items():
+ self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blocklist", "rm", addr)
+ except KeyError:
+ # Fallback for more older Ceph clusters, who will use 'blacklist' instead.
+ blacklist = json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd("osd",
+ "dump", "--format=json-pretty"))['blacklist']
+ log.info(f"Removing {len(blacklist)} blacklist entries")
+ for addr, blocklisted_at in blacklist.items():
+ self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blacklist", "rm", addr)
+
+ def setUp(self):
+ super(CephFSTestCase, self).setUp()
+
+ self.config_set('mon', 'mon_allow_pool_delete', True)
+
+ if len(self.mds_cluster.mds_ids) < self.MDSS_REQUIRED:
+ self.skipTest("Only have {0} MDSs, require {1}".format(
+ len(self.mds_cluster.mds_ids), self.MDSS_REQUIRED
+ ))
+
+ if len(self.mounts) < self.CLIENTS_REQUIRED:
+ self.skipTest("Only have {0} clients, require {1}".format(
+ len(self.mounts), self.CLIENTS_REQUIRED
+ ))
+
+ if self.REQUIRE_ONE_CLIENT_REMOTE:
+ if self.mounts[0].client_remote.hostname in self.mds_cluster.get_mds_hostnames():
+ self.skipTest("Require first client to be on separate server from MDSs")
+
+ # Create friendly mount_a, mount_b attrs
+ for i in range(0, self.CLIENTS_REQUIRED):
+ setattr(self, "mount_{0}".format(chr(ord('a') + i)), self.mounts[i])
+
+ self.mds_cluster.clear_firewall()
+
+ # Unmount all clients, we are about to blow away the filesystem
+ for mount in self.mounts:
+ if mount.is_mounted():
+ mount.umount_wait(force=True)
+ self._save_mount_details()
+
+ # To avoid any issues with e.g. unlink bugs, we destroy and recreate
+ # the filesystem rather than just doing a rm -rf of files
+ self.mds_cluster.delete_all_filesystems()
+ self.mds_cluster.mds_restart() # to reset any run-time configs, etc.
+ self.fs = None # is now invalid!
+ self.backup_fs = None
+ self.recovery_fs = None
+
+ self._remove_blocklist()
+
+ client_mount_ids = [m.client_id for m in self.mounts]
+ # In case there were any extra auth identities around from a previous
+ # test, delete them
+ for entry in self.auth_list():
+ ent_type, ent_id = entry['entity'].split(".")
+ if ent_type == "client" and ent_id not in client_mount_ids and not (ent_id == "admin" or ent_id[:6] == 'mirror'):
+ self.mds_cluster.mon_manager.raw_cluster_cmd("auth", "del", entry['entity'])
+
+ if self.REQUIRE_FILESYSTEM:
+ self.fs = self.mds_cluster.newfs(create=True)
+
+ # In case some test messed with auth caps, reset them
+ for client_id in client_mount_ids:
+ cmd = ['auth', 'caps', f'client.{client_id}', 'mon','allow r',
+ 'osd', f'allow rw tag cephfs data={self.fs.name}',
+ 'mds', 'allow']
+
+ if self.run_cluster_cmd_result(cmd) == 0:
+ break
+
+ cmd[1] = 'add'
+ if self.run_cluster_cmd_result(cmd) != 0:
+ raise RuntimeError(f'Failed to create new client {cmd[2]}')
+
+ # wait for ranks to become active
+ self.fs.wait_for_daemons()
+
+ # Mount the requested number of clients
+ for i in range(0, self.CLIENTS_REQUIRED):
+ self.mounts[i].mount_wait()
+
+ if self.REQUIRE_BACKUP_FILESYSTEM:
+ if not self.REQUIRE_FILESYSTEM:
+ self.skipTest("backup filesystem requires a primary filesystem as well")
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'flag', 'set',
+ 'enable_multiple', 'true',
+ '--yes-i-really-mean-it')
+ self.backup_fs = self.mds_cluster.newfs(name="backup_fs")
+ self.backup_fs.wait_for_daemons()
+
+ # Load an config settings of interest
+ for setting in self.LOAD_SETTINGS:
+ setattr(self, setting, float(self.fs.mds_asok(
+ ['config', 'get', setting], list(self.mds_cluster.mds_ids)[0]
+ )[setting]))
+
+ self.configs_set = set()
+
+ def tearDown(self):
+ self.mds_cluster.clear_firewall()
+ for m in self.mounts:
+ m.teardown()
+
+ # To prevent failover messages during Unwind of ceph task
+ self.mds_cluster.delete_all_filesystems()
+
+ for m, md in zip(self.mounts, self._orig_mount_details):
+ md.restore(m)
+
+ for subsys, key in self.configs_set:
+ self.mds_cluster.clear_ceph_conf(subsys, key)
+
+ return super(CephFSTestCase, self).tearDown()
+
+ def set_conf(self, subsys, key, value):
+ self.configs_set.add((subsys, key))
+ self.mds_cluster.set_ceph_conf(subsys, key, value)
+
+ def auth_list(self):
+ """
+ Convenience wrapper on "ceph auth ls"
+ """
+ return json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd(
+ "auth", "ls", "--format=json-pretty"
+ ))['auth_dump']
+
+ def assert_session_count(self, expected, ls_data=None, mds_id=None):
+ if ls_data is None:
+ ls_data = self.fs.mds_asok(['session', 'ls'], mds_id=mds_id)
+
+ alive_count = len([s for s in ls_data if s['state'] != 'killing'])
+
+ self.assertEqual(expected, alive_count, "Expected {0} sessions, found {1}".format(
+ expected, alive_count
+ ))
+
+ def assert_session_state(self, client_id, expected_state):
+ self.assertEqual(
+ self._session_by_id(
+ self.fs.mds_asok(['session', 'ls'])).get(client_id, {'state': None})['state'],
+ expected_state)
+
+ def get_session_data(self, client_id):
+ return self._session_by_id(client_id)
+
+ def _session_list(self):
+ ls_data = self.fs.mds_asok(['session', 'ls'])
+ ls_data = [s for s in ls_data if s['state'] not in ['stale', 'closed']]
+ return ls_data
+
+ def get_session(self, client_id, session_ls=None):
+ if session_ls is None:
+ session_ls = self.fs.mds_asok(['session', 'ls'])
+
+ return self._session_by_id(session_ls)[client_id]
+
+ def _session_by_id(self, session_ls):
+ return dict([(s['id'], s) for s in session_ls])
+
+ def perf_dump(self, rank=None, status=None):
+ return self.fs.rank_asok(['perf', 'dump'], rank=rank, status=status)
+
+ def wait_until_evicted(self, client_id, timeout=30):
+ def is_client_evicted():
+ ls = self._session_list()
+ for s in ls:
+ if s['id'] == client_id:
+ return False
+ return True
+ self.wait_until_true(is_client_evicted, timeout)
+
+ def wait_for_daemon_start(self, daemon_ids=None):
+ """
+ Wait until all the daemons appear in the FSMap, either assigned
+ MDS ranks or in the list of standbys
+ """
+ def get_daemon_names():
+ return [info['name'] for info in self.mds_cluster.status().get_all()]
+
+ if daemon_ids is None:
+ daemon_ids = self.mds_cluster.mds_ids
+
+ try:
+ self.wait_until_true(
+ lambda: set(daemon_ids) & set(get_daemon_names()) == set(daemon_ids),
+ timeout=30
+ )
+ except RuntimeError:
+ log.warning("Timeout waiting for daemons {0}, while we have {1}".format(
+ daemon_ids, get_daemon_names()
+ ))
+ raise
+
+ def delete_mds_coredump(self, daemon_id):
+ # delete coredump file, otherwise teuthology.internal.coredump will
+ # catch it later and treat it as a failure.
+ core_pattern = self.mds_cluster.mds_daemons[daemon_id].remote.sh(
+ "sudo sysctl -n kernel.core_pattern")
+ core_dir = os.path.dirname(core_pattern.strip())
+ if core_dir: # Non-default core_pattern with a directory in it
+ # We have seen a core_pattern that looks like it's from teuthology's coredump
+ # task, so proceed to clear out the core file
+ if core_dir[0] == '|':
+ log.info("Piped core dumps to program {0}, skip cleaning".format(core_dir[1:]))
+ return;
+
+ log.info("Clearing core from directory: {0}".format(core_dir))
+
+ # Verify that we see the expected single coredump
+ ls_output = self.mds_cluster.mds_daemons[daemon_id].remote.sh([
+ "cd", core_dir, run.Raw('&&'),
+ "sudo", "ls", run.Raw('|'), "sudo", "xargs", "file"
+ ])
+ cores = [l.partition(":")[0]
+ for l in ls_output.strip().split("\n")
+ if re.match(r'.*ceph-mds.* -i +{0}'.format(daemon_id), l)]
+
+ log.info("Enumerated cores: {0}".format(cores))
+ self.assertEqual(len(cores), 1)
+
+ log.info("Found core file {0}, deleting it".format(cores[0]))
+
+ self.mds_cluster.mds_daemons[daemon_id].remote.run(args=[
+ "cd", core_dir, run.Raw('&&'), "sudo", "rm", "-f", cores[0]
+ ])
+ else:
+ log.info("No core_pattern directory set, nothing to clear (internal.coredump not enabled?)")
+
+ def _get_subtrees(self, status=None, rank=None, path=None):
+ if path is None:
+ path = "/"
+ try:
+ with contextutil.safe_while(sleep=1, tries=3) as proceed:
+ while proceed():
+ try:
+ if rank == "all":
+ subtrees = []
+ for r in self.fs.get_ranks(status=status):
+ s = self.fs.rank_asok(["get", "subtrees"], status=status, rank=r['rank'])
+ s = filter(lambda s: s['auth_first'] == r['rank'] and s['auth_second'] == -2, s)
+ subtrees += s
+ else:
+ subtrees = self.fs.rank_asok(["get", "subtrees"], status=status, rank=rank)
+ subtrees = filter(lambda s: s['dir']['path'].startswith(path), subtrees)
+ return list(subtrees)
+ except CommandFailedError as e:
+ # Sometimes we get transient errors
+ if e.exitstatus == 22:
+ pass
+ else:
+ raise
+ except contextutil.MaxWhileTries as e:
+ raise RuntimeError(f"could not get subtree state from rank {rank}") from e
+
+ def _wait_subtrees(self, test, status=None, rank=None, timeout=30, sleep=2, action=None, path=None):
+ test = sorted(test)
+ try:
+ with contextutil.safe_while(sleep=sleep, tries=timeout//sleep) as proceed:
+ while proceed():
+ subtrees = self._get_subtrees(status=status, rank=rank, path=path)
+ filtered = sorted([(s['dir']['path'], s['auth_first']) for s in subtrees])
+ log.info("%s =?= %s", filtered, test)
+ if filtered == test:
+ # Confirm export_pin in output is correct:
+ for s in subtrees:
+ if s['export_pin_target'] >= 0:
+ self.assertTrue(s['export_pin_target'] == s['auth_first'])
+ return subtrees
+ if action is not None:
+ action()
+ except contextutil.MaxWhileTries as e:
+ raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) from e
+
+ def _wait_until_scrub_complete(self, path="/", recursive=True, timeout=100):
+ out_json = self.fs.run_scrub(["start", path] + ["recursive"] if recursive else [])
+ if not self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"],
+ sleep=10, timeout=timeout):
+ log.info("timed out waiting for scrub to complete")
+
+ def _wait_distributed_subtrees(self, count, status=None, rank=None, path=None):
+ try:
+ with contextutil.safe_while(sleep=5, tries=20) as proceed:
+ while proceed():
+ subtrees = self._get_subtrees(status=status, rank=rank, path=path)
+ subtrees = list(filter(lambda s: s['distributed_ephemeral_pin'] == True and
+ s['auth_first'] == s['export_pin_target'],
+ subtrees))
+ log.info(f"len={len(subtrees)} {subtrees}")
+ if len(subtrees) >= count:
+ return subtrees
+ except contextutil.MaxWhileTries as e:
+ raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) from e
+
+ def _wait_random_subtrees(self, count, status=None, rank=None, path=None):
+ try:
+ with contextutil.safe_while(sleep=5, tries=20) as proceed:
+ while proceed():
+ subtrees = self._get_subtrees(status=status, rank=rank, path=path)
+ subtrees = list(filter(lambda s: s['random_ephemeral_pin'] == True and
+ s['auth_first'] == s['export_pin_target'],
+ subtrees))
+ log.info(f"len={len(subtrees)} {subtrees}")
+ if len(subtrees) >= count:
+ return subtrees
+ except contextutil.MaxWhileTries as e:
+ raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) from e
+
+ def run_cluster_cmd(self, cmd):
+ if isinstance(cmd, str):
+ cmd = shlex_split(cmd)
+ return self.fs.mon_manager.raw_cluster_cmd(*cmd)
+
+ def run_cluster_cmd_result(self, cmd):
+ if isinstance(cmd, str):
+ cmd = shlex_split(cmd)
+ return self.fs.mon_manager.raw_cluster_cmd_result(*cmd)
+
+ def create_client(self, client_id, moncap=None, osdcap=None, mdscap=None):
+ if not (moncap or osdcap or mdscap):
+ if self.fs:
+ return self.fs.authorize(client_id, ('/', 'rw'))
+ else:
+ raise RuntimeError('no caps were passed and the default FS '
+ 'is not created yet to allow client auth '
+ 'for it.')
+
+ cmd = ['auth', 'add', f'client.{client_id}']
+ if moncap:
+ cmd += ['mon', moncap]
+ if osdcap:
+ cmd += ['osd', osdcap]
+ if mdscap:
+ cmd += ['mds', mdscap]
+
+ self.run_cluster_cmd(cmd)
+ return self.run_cluster_cmd(f'auth get {self.client_name}')
diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py
new file mode 100644
index 000000000..777ba8249
--- /dev/null
+++ b/qa/tasks/cephfs/filesystem.py
@@ -0,0 +1,1712 @@
+
+import json
+import logging
+from gevent import Greenlet
+import os
+import time
+import datetime
+import re
+import errno
+import random
+
+from io import BytesIO, StringIO
+from errno import EBUSY
+
+from teuthology.exceptions import CommandFailedError
+from teuthology import misc
+from teuthology.nuke import clear_firewall
+from teuthology.parallel import parallel
+from teuthology import contextutil
+from tasks.ceph_manager import write_conf
+from tasks import ceph_manager
+
+
+log = logging.getLogger(__name__)
+
+
+DAEMON_WAIT_TIMEOUT = 120
+ROOT_INO = 1
+
+class FileLayout(object):
+ def __init__(self, pool=None, pool_namespace=None, stripe_unit=None, stripe_count=None, object_size=None):
+ self.pool = pool
+ self.pool_namespace = pool_namespace
+ self.stripe_unit = stripe_unit
+ self.stripe_count = stripe_count
+ self.object_size = object_size
+
+ @classmethod
+ def load_from_ceph(layout_str):
+ # TODO
+ pass
+
+ def items(self):
+ if self.pool is not None:
+ yield ("pool", self.pool)
+ if self.pool_namespace:
+ yield ("pool_namespace", self.pool_namespace)
+ if self.stripe_unit is not None:
+ yield ("stripe_unit", self.stripe_unit)
+ if self.stripe_count is not None:
+ yield ("stripe_count", self.stripe_count)
+ if self.object_size is not None:
+ yield ("object_size", self.stripe_size)
+
+class ObjectNotFound(Exception):
+ def __init__(self, object_name):
+ self._object_name = object_name
+
+ def __str__(self):
+ return "Object not found: '{0}'".format(self._object_name)
+
+class FSMissing(Exception):
+ def __init__(self, ident):
+ self.ident = ident
+
+ def __str__(self):
+ return f"File system {self.ident} does not exist in the map"
+
+class FSStatus(object):
+ """
+ Operations on a snapshot of the FSMap.
+ """
+ def __init__(self, mon_manager, epoch=None):
+ self.mon = mon_manager
+ cmd = ["fs", "dump", "--format=json"]
+ if epoch is not None:
+ cmd.append(str(epoch))
+ self.map = json.loads(self.mon.raw_cluster_cmd(*cmd))
+
+ def __str__(self):
+ return json.dumps(self.map, indent = 2, sort_keys = True)
+
+ # Expose the fsmap for manual inspection.
+ def __getitem__(self, key):
+ """
+ Get a field from the fsmap.
+ """
+ return self.map[key]
+
+ def get_filesystems(self):
+ """
+ Iterator for all filesystems.
+ """
+ for fs in self.map['filesystems']:
+ yield fs
+
+ def get_all(self):
+ """
+ Iterator for all the mds_info components in the FSMap.
+ """
+ for info in self.map['standbys']:
+ yield info
+ for fs in self.map['filesystems']:
+ for info in fs['mdsmap']['info'].values():
+ yield info
+
+ def get_standbys(self):
+ """
+ Iterator for all standbys.
+ """
+ for info in self.map['standbys']:
+ yield info
+
+ def get_fsmap(self, fscid):
+ """
+ Get the fsmap for the given FSCID.
+ """
+ for fs in self.map['filesystems']:
+ if fscid is None or fs['id'] == fscid:
+ return fs
+ raise FSMissing(fscid)
+
+ def get_fsmap_byname(self, name):
+ """
+ Get the fsmap for the given file system name.
+ """
+ for fs in self.map['filesystems']:
+ if name is None or fs['mdsmap']['fs_name'] == name:
+ return fs
+ raise FSMissing(name)
+
+ def get_replays(self, fscid):
+ """
+ Get the standby:replay MDS for the given FSCID.
+ """
+ fs = self.get_fsmap(fscid)
+ for info in fs['mdsmap']['info'].values():
+ if info['state'] == 'up:standby-replay':
+ yield info
+
+ def get_ranks(self, fscid):
+ """
+ Get the ranks for the given FSCID.
+ """
+ fs = self.get_fsmap(fscid)
+ for info in fs['mdsmap']['info'].values():
+ if info['rank'] >= 0 and info['state'] != 'up:standby-replay':
+ yield info
+
+ def get_damaged(self, fscid):
+ """
+ Get the damaged ranks for the given FSCID.
+ """
+ fs = self.get_fsmap(fscid)
+ return fs['mdsmap']['damaged']
+
+ def get_rank(self, fscid, rank):
+ """
+ Get the rank for the given FSCID.
+ """
+ for info in self.get_ranks(fscid):
+ if info['rank'] == rank:
+ return info
+ raise RuntimeError("FSCID {0} has no rank {1}".format(fscid, rank))
+
+ def get_mds(self, name):
+ """
+ Get the info for the given MDS name.
+ """
+ for info in self.get_all():
+ if info['name'] == name:
+ return info
+ return None
+
+ def get_mds_addr(self, name):
+ """
+ Return the instance addr as a string, like "10.214.133.138:6807\/10825"
+ """
+ info = self.get_mds(name)
+ if info:
+ return info['addr']
+ else:
+ log.warning(json.dumps(list(self.get_all()), indent=2)) # dump for debugging
+ raise RuntimeError("MDS id '{0}' not found in map".format(name))
+
+ def get_mds_addrs(self, name):
+ """
+ Return the instance addr as a string, like "[10.214.133.138:6807 10.214.133.138:6808]"
+ """
+ info = self.get_mds(name)
+ if info:
+ return [e['addr'] for e in info['addrs']['addrvec']]
+ else:
+ log.warn(json.dumps(list(self.get_all()), indent=2)) # dump for debugging
+ raise RuntimeError("MDS id '{0}' not found in map".format(name))
+
+ def get_mds_gid(self, gid):
+ """
+ Get the info for the given MDS gid.
+ """
+ for info in self.get_all():
+ if info['gid'] == gid:
+ return info
+ return None
+
+ def hadfailover(self, status):
+ """
+ Compares two statuses for mds failovers.
+ Returns True if there is a failover.
+ """
+ for fs in status.map['filesystems']:
+ for info in fs['mdsmap']['info'].values():
+ oldinfo = self.get_mds_gid(info['gid'])
+ if oldinfo is None or oldinfo['incarnation'] != info['incarnation']:
+ return True
+ #all matching
+ return False
+
+class CephCluster(object):
+ @property
+ def admin_remote(self):
+ first_mon = misc.get_first_mon(self._ctx, None)
+ (result,) = self._ctx.cluster.only(first_mon).remotes.keys()
+ return result
+
+ def __init__(self, ctx) -> None:
+ self._ctx = ctx
+ self.mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager'))
+
+ def get_config(self, key, service_type=None):
+ """
+ Get config from mon by default, or a specific service if caller asks for it
+ """
+ if service_type is None:
+ service_type = 'mon'
+
+ service_id = sorted(misc.all_roles_of_type(self._ctx.cluster, service_type))[0]
+ return self.json_asok(['config', 'get', key], service_type, service_id)[key]
+
+ def set_ceph_conf(self, subsys, key, value):
+ if subsys not in self._ctx.ceph['ceph'].conf:
+ self._ctx.ceph['ceph'].conf[subsys] = {}
+ self._ctx.ceph['ceph'].conf[subsys][key] = value
+ write_conf(self._ctx) # XXX because we don't have the ceph task's config object, if they
+ # used a different config path this won't work.
+
+ def clear_ceph_conf(self, subsys, key):
+ del self._ctx.ceph['ceph'].conf[subsys][key]
+ write_conf(self._ctx)
+
+ def json_asok(self, command, service_type, service_id, timeout=None):
+ if timeout is None:
+ timeout = 300
+ command.insert(0, '--format=json')
+ proc = self.mon_manager.admin_socket(service_type, service_id, command, timeout=timeout)
+ response_data = proc.stdout.getvalue().strip()
+ if len(response_data) > 0:
+
+ def get_nonnumeric_values(value):
+ c = {"NaN": float("nan"), "Infinity": float("inf"),
+ "-Infinity": -float("inf")}
+ return c[value]
+
+ j = json.loads(response_data.replace('inf', 'Infinity'),
+ parse_constant=get_nonnumeric_values)
+ pretty = json.dumps(j, sort_keys=True, indent=2)
+ log.debug(f"_json_asok output\n{pretty}")
+ return j
+ else:
+ log.debug("_json_asok output empty")
+ return None
+
+ def is_addr_blocklisted(self, addr):
+ blocklist = json.loads(self.mon_manager.raw_cluster_cmd(
+ "osd", "dump", "--format=json"))['blocklist']
+ if addr in blocklist:
+ return True
+ log.warn(f'The address {addr} is not blocklisted')
+ return False
+
+
+class MDSCluster(CephCluster):
+ """
+ Collective operations on all the MDS daemons in the Ceph cluster. These
+ daemons may be in use by various Filesystems.
+
+ For the benefit of pre-multi-filesystem tests, this class is also
+ a parent of Filesystem. The correct way to use MDSCluster going forward is
+ as a separate instance outside of your (multiple) Filesystem instances.
+ """
+
+ def __init__(self, ctx):
+ super(MDSCluster, self).__init__(ctx)
+
+ @property
+ def mds_ids(self):
+ # do this dynamically because the list of ids may change periodically with cephadm
+ return list(misc.all_roles_of_type(self._ctx.cluster, 'mds'))
+
+ @property
+ def mds_daemons(self):
+ return dict([(mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids])
+
+ def _one_or_all(self, mds_id, cb, in_parallel=True):
+ """
+ Call a callback for a single named MDS, or for all.
+
+ Note that the parallelism here isn't for performance, it's to avoid being overly kind
+ to the cluster by waiting a graceful ssh-latency of time between doing things, and to
+ avoid being overly kind by executing them in a particular order. However, some actions
+ don't cope with being done in parallel, so it's optional (`in_parallel`)
+
+ :param mds_id: MDS daemon name, or None
+ :param cb: Callback taking single argument of MDS daemon name
+ :param in_parallel: whether to invoke callbacks concurrently (else one after the other)
+ """
+
+ if mds_id is None:
+ if in_parallel:
+ with parallel() as p:
+ for mds_id in self.mds_ids:
+ p.spawn(cb, mds_id)
+ else:
+ for mds_id in self.mds_ids:
+ cb(mds_id)
+ else:
+ cb(mds_id)
+
+ def get_config(self, key, service_type=None):
+ """
+ get_config specialization of service_type="mds"
+ """
+ if service_type != "mds":
+ return super(MDSCluster, self).get_config(key, service_type)
+
+ # Some tests stop MDS daemons, don't send commands to a dead one:
+ running_daemons = [i for i, mds in self.mds_daemons.items() if mds.running()]
+ service_id = random.sample(running_daemons, 1)[0]
+ return self.json_asok(['config', 'get', key], service_type, service_id)[key]
+
+ def mds_stop(self, mds_id=None):
+ """
+ Stop the MDS daemon process(se). If it held a rank, that rank
+ will eventually go laggy.
+ """
+ self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].stop())
+
+ def mds_fail(self, mds_id=None):
+ """
+ Inform MDSMonitor of the death of the daemon process(es). If it held
+ a rank, that rank will be relinquished.
+ """
+ self._one_or_all(mds_id, lambda id_: self.mon_manager.raw_cluster_cmd("mds", "fail", id_))
+
+ def mds_restart(self, mds_id=None):
+ self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].restart())
+
+ def mds_fail_restart(self, mds_id=None):
+ """
+ Variation on restart that includes marking MDSs as failed, so that doing this
+ operation followed by waiting for healthy daemon states guarantees that they
+ have gone down and come up, rather than potentially seeing the healthy states
+ that existed before the restart.
+ """
+ def _fail_restart(id_):
+ self.mds_daemons[id_].stop()
+ self.mon_manager.raw_cluster_cmd("mds", "fail", id_)
+ self.mds_daemons[id_].restart()
+
+ self._one_or_all(mds_id, _fail_restart)
+
+ def mds_signal(self, mds_id, sig, silent=False):
+ """
+ signal a MDS daemon
+ """
+ self.mds_daemons[mds_id].signal(sig, silent);
+
+ def mds_is_running(self, mds_id):
+ return self.mds_daemons[mds_id].running()
+
+ def newfs(self, name='cephfs', create=True):
+ return Filesystem(self._ctx, name=name, create=create)
+
+ def status(self, epoch=None):
+ return FSStatus(self.mon_manager, epoch)
+
+ def get_standby_daemons(self):
+ return set([s['name'] for s in self.status().get_standbys()])
+
+ def get_mds_hostnames(self):
+ result = set()
+ for mds_id in self.mds_ids:
+ mds_remote = self.mon_manager.find_remote('mds', mds_id)
+ result.add(mds_remote.hostname)
+
+ return list(result)
+
+ def set_clients_block(self, blocked, mds_id=None):
+ """
+ Block (using iptables) client communications to this MDS. Be careful: if
+ other services are running on this MDS, or other MDSs try to talk to this
+ MDS, their communications may also be blocked as collatoral damage.
+
+ :param mds_id: Optional ID of MDS to block, default to all
+ :return:
+ """
+ da_flag = "-A" if blocked else "-D"
+
+ def set_block(_mds_id):
+ remote = self.mon_manager.find_remote('mds', _mds_id)
+ status = self.status()
+
+ addr = status.get_mds_addr(_mds_id)
+ ip_str, port_str, inst_str = re.match("(.+):(.+)/(.+)", addr).groups()
+
+ remote.run(
+ args=["sudo", "iptables", da_flag, "OUTPUT", "-p", "tcp", "--sport", port_str, "-j", "REJECT", "-m",
+ "comment", "--comment", "teuthology"])
+ remote.run(
+ args=["sudo", "iptables", da_flag, "INPUT", "-p", "tcp", "--dport", port_str, "-j", "REJECT", "-m",
+ "comment", "--comment", "teuthology"])
+
+ self._one_or_all(mds_id, set_block, in_parallel=False)
+
+ def set_inter_mds_block(self, blocked, mds_rank_1, mds_rank_2):
+ """
+ Block (using iptables) communications from a provided MDS to other MDSs.
+ Block all ports that an MDS uses for communication.
+
+ :param blocked: True to block the MDS, False otherwise
+ :param mds_rank_1: MDS rank
+ :param mds_rank_2: MDS rank
+ :return:
+ """
+ da_flag = "-A" if blocked else "-D"
+
+ def set_block(mds_ids):
+ status = self.status()
+
+ mds = mds_ids[0]
+ remote = self.mon_manager.find_remote('mds', mds)
+ addrs = status.get_mds_addrs(mds)
+ for addr in addrs:
+ ip_str, port_str = re.match("(.+):(.+)", addr).groups()
+ remote.run(
+ args=["sudo", "iptables", da_flag, "INPUT", "-p", "tcp", "--dport", port_str, "-j", "REJECT", "-m",
+ "comment", "--comment", "teuthology"], omit_sudo=False)
+
+
+ mds = mds_ids[1]
+ remote = self.mon_manager.find_remote('mds', mds)
+ addrs = status.get_mds_addrs(mds)
+ for addr in addrs:
+ ip_str, port_str = re.match("(.+):(.+)", addr).groups()
+ remote.run(
+ args=["sudo", "iptables", da_flag, "OUTPUT", "-p", "tcp", "--sport", port_str, "-j", "REJECT", "-m",
+ "comment", "--comment", "teuthology"], omit_sudo=False)
+ remote.run(
+ args=["sudo", "iptables", da_flag, "INPUT", "-p", "tcp", "--dport", port_str, "-j", "REJECT", "-m",
+ "comment", "--comment", "teuthology"], omit_sudo=False)
+
+ self._one_or_all((mds_rank_1, mds_rank_2), set_block, in_parallel=False)
+
+ def clear_firewall(self):
+ clear_firewall(self._ctx)
+
+ def get_mds_info(self, mds_id):
+ return FSStatus(self.mon_manager).get_mds(mds_id)
+
+ def is_pool_full(self, pool_name):
+ pools = json.loads(self.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools']
+ for pool in pools:
+ if pool['pool_name'] == pool_name:
+ return 'full' in pool['flags_names'].split(",")
+
+ raise RuntimeError("Pool not found '{0}'".format(pool_name))
+
+ def delete_all_filesystems(self):
+ """
+ Remove all filesystems that exist, and any pools in use by them.
+ """
+ for fs in self.status().get_filesystems():
+ Filesystem(ctx=self._ctx, fscid=fs['id']).destroy()
+
+ @property
+ def beacon_timeout(self):
+ """
+ Generate an acceptable timeout for the mons to drive some MDSMap change
+ because of missed beacons from some MDS. This involves looking up the
+ grace period in use by the mons and adding an acceptable buffer.
+ """
+
+ grace = float(self.get_config("mds_beacon_grace", service_type="mon"))
+ return grace*2+15
+
+
+class Filesystem(MDSCluster):
+
+ """
+ Generator for all Filesystems in the cluster.
+ """
+ @classmethod
+ def get_all_fs(cls, ctx):
+ mdsc = MDSCluster(ctx)
+ status = mdsc.status()
+ for fs in status.get_filesystems():
+ yield cls(ctx, fscid=fs['id'])
+
+ """
+ This object is for driving a CephFS filesystem. The MDS daemons driven by
+ MDSCluster may be shared with other Filesystems.
+ """
+ def __init__(self, ctx, fs_config={}, fscid=None, name=None, create=False):
+ super(Filesystem, self).__init__(ctx)
+
+ self.name = name
+ self.id = None
+ self.metadata_pool_name = None
+ self.data_pool_name = None
+ self.data_pools = None
+ self.fs_config = fs_config
+ self.ec_profile = fs_config.get('ec_profile')
+
+ client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client'))
+ self.client_id = client_list[0]
+ self.client_remote = list(misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id)]))[0][1]
+
+ if name is not None:
+ if fscid is not None:
+ raise RuntimeError("cannot specify fscid when creating fs")
+ if create and not self.legacy_configured():
+ self.create()
+ else:
+ if fscid is not None:
+ self.id = fscid
+ self.getinfo(refresh = True)
+
+ # Stash a reference to the first created filesystem on ctx, so
+ # that if someone drops to the interactive shell they can easily
+ # poke our methods.
+ if not hasattr(self._ctx, "filesystem"):
+ self._ctx.filesystem = self
+
+ def dead(self):
+ try:
+ return not bool(self.get_mds_map())
+ except FSMissing:
+ return True
+
+ def get_task_status(self, status_key):
+ return self.mon_manager.get_service_task_status("mds", status_key)
+
+ def getinfo(self, refresh = False):
+ status = self.status()
+ if self.id is not None:
+ fsmap = status.get_fsmap(self.id)
+ elif self.name is not None:
+ fsmap = status.get_fsmap_byname(self.name)
+ else:
+ fss = [fs for fs in status.get_filesystems()]
+ if len(fss) == 1:
+ fsmap = fss[0]
+ elif len(fss) == 0:
+ raise RuntimeError("no file system available")
+ else:
+ raise RuntimeError("more than one file system available")
+ self.id = fsmap['id']
+ self.name = fsmap['mdsmap']['fs_name']
+ self.get_pool_names(status = status, refresh = refresh)
+ return status
+
+ def reach_max_mds(self):
+ status = self.wait_for_daemons()
+ mds_map = self.get_mds_map(status=status)
+ assert(mds_map['in'] == list(range(0, mds_map['max_mds'])))
+
+ def reset(self):
+ self.mon_manager.raw_cluster_cmd("fs", "reset", str(self.name), '--yes-i-really-mean-it')
+
+ def fail(self):
+ self.mon_manager.raw_cluster_cmd("fs", "fail", str(self.name))
+
+ def set_flag(self, var, *args):
+ a = map(lambda x: str(x).lower(), args)
+ self.mon_manager.raw_cluster_cmd("fs", "flag", "set", var, *a)
+
+ def set_allow_multifs(self, yes=True):
+ self.set_flag("enable_multiple", yes)
+
+ def set_var(self, var, *args):
+ a = map(lambda x: str(x).lower(), args)
+ self.mon_manager.raw_cluster_cmd("fs", "set", self.name, var, *a)
+
+ def set_down(self, down=True):
+ self.set_var("down", str(down).lower())
+
+ def set_joinable(self, joinable=True):
+ self.set_var("joinable", joinable)
+
+ def set_max_mds(self, max_mds):
+ self.set_var("max_mds", "%d" % max_mds)
+
+ def set_session_timeout(self, timeout):
+ self.set_var("session_timeout", "%d" % timeout)
+
+ def set_allow_standby_replay(self, yes):
+ self.set_var("allow_standby_replay", yes)
+
+ def set_allow_new_snaps(self, yes):
+ self.set_var("allow_new_snaps", yes, '--yes-i-really-mean-it')
+
+ def set_bal_rank_mask(self, bal_rank_mask):
+ self.set_var("bal_rank_mask", bal_rank_mask)
+
+ def set_refuse_client_session(self, yes):
+ self.set_var("refuse_client_session", yes)
+
+ def compat(self, *args):
+ a = map(lambda x: str(x).lower(), args)
+ self.mon_manager.raw_cluster_cmd("fs", "compat", self.name, *a)
+
+ def add_compat(self, *args):
+ self.compat("add_compat", *args)
+
+ def add_incompat(self, *args):
+ self.compat("add_incompat", *args)
+
+ def rm_compat(self, *args):
+ self.compat("rm_compat", *args)
+
+ def rm_incompat(self, *args):
+ self.compat("rm_incompat", *args)
+
+ def required_client_features(self, *args, **kwargs):
+ c = ["fs", "required_client_features", self.name, *args]
+ return self.mon_manager.run_cluster_cmd(args=c, **kwargs)
+
+ # Since v15.1.0 the pg autoscale mode has been enabled as default,
+ # will let the pg autoscale mode to calculate the pg_num as needed.
+ # We set the pg_num_min to 64 to make sure that pg autoscale mode
+ # won't set the pg_num to low to fix Tracker#45434.
+ pg_num = 64
+ pg_num_min = 64
+ target_size_ratio = 0.9
+ target_size_ratio_ec = 0.9
+
+ def create(self, recover=False, metadata_overlay=False):
+ if self.name is None:
+ self.name = "cephfs"
+ if self.metadata_pool_name is None:
+ self.metadata_pool_name = "{0}_metadata".format(self.name)
+ if self.data_pool_name is None:
+ data_pool_name = "{0}_data".format(self.name)
+ else:
+ data_pool_name = self.data_pool_name
+
+ # will use the ec pool to store the data and a small amount of
+ # metadata still goes to the primary data pool for all files.
+ if not metadata_overlay and self.ec_profile and 'disabled' not in self.ec_profile:
+ self.target_size_ratio = 0.05
+
+ log.debug("Creating filesystem '{0}'".format(self.name))
+
+ try:
+ self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+ self.metadata_pool_name,
+ '--pg_num_min', str(self.pg_num_min))
+
+ self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+ data_pool_name, str(self.pg_num),
+ '--pg_num_min', str(self.pg_num_min),
+ '--target_size_ratio',
+ str(self.target_size_ratio))
+ except CommandFailedError as e:
+ if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option
+ self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+ self.metadata_pool_name,
+ str(self.pg_num_min))
+
+ self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+ data_pool_name, str(self.pg_num),
+ str(self.pg_num_min))
+ else:
+ raise
+
+ args = ["fs", "new", self.name, self.metadata_pool_name, data_pool_name]
+ if recover:
+ args.append('--recover')
+ if metadata_overlay:
+ args.append('--allow-dangerous-metadata-overlay')
+ self.mon_manager.raw_cluster_cmd(*args)
+
+ if not recover:
+ if self.ec_profile and 'disabled' not in self.ec_profile:
+ ec_data_pool_name = data_pool_name + "_ec"
+ log.debug("EC profile is %s", self.ec_profile)
+ cmd = ['osd', 'erasure-code-profile', 'set', ec_data_pool_name]
+ cmd.extend(self.ec_profile)
+ self.mon_manager.raw_cluster_cmd(*cmd)
+ try:
+ self.mon_manager.raw_cluster_cmd(
+ 'osd', 'pool', 'create', ec_data_pool_name,
+ 'erasure', ec_data_pool_name,
+ '--pg_num_min', str(self.pg_num_min),
+ '--target_size_ratio', str(self.target_size_ratio_ec))
+ except CommandFailedError as e:
+ if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option
+ self.mon_manager.raw_cluster_cmd(
+ 'osd', 'pool', 'create', ec_data_pool_name,
+ str(self.pg_num_min), 'erasure', ec_data_pool_name)
+ else:
+ raise
+ self.mon_manager.raw_cluster_cmd(
+ 'osd', 'pool', 'set',
+ ec_data_pool_name, 'allow_ec_overwrites', 'true')
+ self.add_data_pool(ec_data_pool_name, create=False)
+ self.check_pool_application(ec_data_pool_name)
+
+ self.run_client_payload(f"setfattr -n ceph.dir.layout.pool -v {ec_data_pool_name} . && getfattr -n ceph.dir.layout .")
+
+ self.check_pool_application(self.metadata_pool_name)
+ self.check_pool_application(data_pool_name)
+
+ # Turn off spurious standby count warnings from modifying max_mds in tests.
+ try:
+ self.mon_manager.raw_cluster_cmd('fs', 'set', self.name, 'standby_count_wanted', '0')
+ except CommandFailedError as e:
+ if e.exitstatus == 22:
+ # standby_count_wanted not available prior to luminous (upgrade tests would fail otherwise)
+ pass
+ else:
+ raise
+
+ if self.fs_config is not None:
+ log.debug(f"fs_config: {self.fs_config}")
+ max_mds = self.fs_config.get('max_mds', 1)
+ if max_mds > 1:
+ self.set_max_mds(max_mds)
+
+ standby_replay = self.fs_config.get('standby_replay', False)
+ self.set_allow_standby_replay(standby_replay)
+
+ # If absent will use the default value (60 seconds)
+ session_timeout = self.fs_config.get('session_timeout', 60)
+ if session_timeout != 60:
+ self.set_session_timeout(session_timeout)
+
+ if self.fs_config.get('subvols', None) is not None:
+ log.debug(f"Creating {self.fs_config.get('subvols')} subvols "
+ f"for filesystem '{self.name}'")
+ if not hasattr(self._ctx, "created_subvols"):
+ self._ctx.created_subvols = dict()
+
+ subvols = self.fs_config.get('subvols')
+ assert(isinstance(subvols, dict))
+ assert(isinstance(subvols['create'], int))
+ assert(subvols['create'] > 0)
+
+ for sv in range(0, subvols['create']):
+ sv_name = f'sv_{sv}'
+ self.mon_manager.raw_cluster_cmd(
+ 'fs', 'subvolume', 'create', self.name, sv_name,
+ self.fs_config.get('subvol_options', ''))
+
+ if self.name not in self._ctx.created_subvols:
+ self._ctx.created_subvols[self.name] = []
+
+ subvol_path = self.mon_manager.raw_cluster_cmd(
+ 'fs', 'subvolume', 'getpath', self.name, sv_name)
+ subvol_path = subvol_path.strip()
+ self._ctx.created_subvols[self.name].append(subvol_path)
+ else:
+ log.debug(f"Not Creating any subvols for filesystem '{self.name}'")
+
+
+ self.getinfo(refresh = True)
+
+ # wait pgs to be clean
+ self.mon_manager.wait_for_clean()
+
+ def run_client_payload(self, cmd):
+ # avoid circular dep by importing here:
+ from tasks.cephfs.fuse_mount import FuseMount
+
+ # Wait for at MDS daemons to be ready before mounting the
+ # ceph-fuse client in run_client_payload()
+ self.wait_for_daemons()
+
+ d = misc.get_testdir(self._ctx)
+ m = FuseMount(self._ctx, d, "admin", self.client_remote, cephfs_name=self.name)
+ m.mount_wait()
+ m.run_shell_payload(cmd)
+ m.umount_wait(require_clean=True)
+
+ def _remove_pool(self, name, **kwargs):
+ c = f'osd pool rm {name} {name} --yes-i-really-really-mean-it'
+ return self.mon_manager.ceph(c, **kwargs)
+
+ def rm(self, **kwargs):
+ c = f'fs rm {self.name} --yes-i-really-mean-it'
+ return self.mon_manager.ceph(c, **kwargs)
+
+ def remove_pools(self, data_pools):
+ self._remove_pool(self.get_metadata_pool_name())
+ for poolname in data_pools:
+ try:
+ self._remove_pool(poolname)
+ except CommandFailedError as e:
+ # EBUSY, this data pool is used by two metadata pools, let the
+ # 2nd pass delete it
+ if e.exitstatus == EBUSY:
+ pass
+ else:
+ raise
+
+ def destroy(self, reset_obj_attrs=True):
+ log.info(f'Destroying file system {self.name} and related pools')
+
+ if self.dead():
+ log.debug('already dead...')
+ return
+
+ data_pools = self.get_data_pool_names(refresh=True)
+
+ # make sure no MDSs are attached to given FS.
+ self.fail()
+ self.rm()
+
+ self.remove_pools(data_pools)
+
+ if reset_obj_attrs:
+ self.id = None
+ self.name = None
+ self.metadata_pool_name = None
+ self.data_pool_name = None
+ self.data_pools = None
+
+ def recreate(self):
+ self.destroy()
+
+ self.create()
+ self.getinfo(refresh=True)
+
+ def check_pool_application(self, pool_name):
+ osd_map = self.mon_manager.get_osd_dump_json()
+ for pool in osd_map['pools']:
+ if pool['pool_name'] == pool_name:
+ if "application_metadata" in pool:
+ if not "cephfs" in pool['application_metadata']:
+ raise RuntimeError("Pool {pool_name} does not name cephfs as application!".\
+ format(pool_name=pool_name))
+
+ def __del__(self):
+ if getattr(self._ctx, "filesystem", None) == self:
+ delattr(self._ctx, "filesystem")
+
+ def exists(self):
+ """
+ Whether a filesystem exists in the mon's filesystem list
+ """
+ fs_list = json.loads(self.mon_manager.raw_cluster_cmd('fs', 'ls', '--format=json-pretty'))
+ return self.name in [fs['name'] for fs in fs_list]
+
+ def legacy_configured(self):
+ """
+ Check if a legacy (i.e. pre "fs new") filesystem configuration is present. If this is
+ the case, the caller should avoid using Filesystem.create
+ """
+ try:
+ out_text = self.mon_manager.raw_cluster_cmd('--format=json-pretty', 'osd', 'lspools')
+ pools = json.loads(out_text)
+ metadata_pool_exists = 'metadata' in [p['poolname'] for p in pools]
+ if metadata_pool_exists:
+ self.metadata_pool_name = 'metadata'
+ except CommandFailedError as e:
+ # For use in upgrade tests, Ceph cuttlefish and earlier don't support
+ # structured output (--format) from the CLI.
+ if e.exitstatus == 22:
+ metadata_pool_exists = True
+ else:
+ raise
+
+ return metadata_pool_exists
+
+ def _df(self):
+ return json.loads(self.mon_manager.raw_cluster_cmd("df", "--format=json-pretty"))
+
+ # may raise FSMissing
+ def get_mds_map(self, status=None):
+ if status is None:
+ status = self.status()
+ return status.get_fsmap(self.id)['mdsmap']
+
+ def get_var(self, var, status=None):
+ return self.get_mds_map(status=status)[var]
+
+ def set_dir_layout(self, mount, path, layout):
+ for name, value in layout.items():
+ mount.run_shell(args=["setfattr", "-n", "ceph.dir.layout."+name, "-v", str(value), path])
+
+ def add_data_pool(self, name, create=True):
+ if create:
+ try:
+ self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name,
+ '--pg_num_min', str(self.pg_num_min))
+ except CommandFailedError as e:
+ if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option
+ self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name,
+ str(self.pg_num_min))
+ else:
+ raise
+ self.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', self.name, name)
+ self.get_pool_names(refresh = True)
+ for poolid, fs_name in self.data_pools.items():
+ if name == fs_name:
+ return poolid
+ raise RuntimeError("could not get just created pool '{0}'".format(name))
+
+ def get_pool_names(self, refresh = False, status = None):
+ if refresh or self.metadata_pool_name is None or self.data_pools is None:
+ if status is None:
+ status = self.status()
+ fsmap = status.get_fsmap(self.id)
+
+ osd_map = self.mon_manager.get_osd_dump_json()
+ id_to_name = {}
+ for p in osd_map['pools']:
+ id_to_name[p['pool']] = p['pool_name']
+
+ self.metadata_pool_name = id_to_name[fsmap['mdsmap']['metadata_pool']]
+ self.data_pools = {}
+ for data_pool in fsmap['mdsmap']['data_pools']:
+ self.data_pools[data_pool] = id_to_name[data_pool]
+
+ def get_data_pool_name(self, refresh = False):
+ if refresh or self.data_pools is None:
+ self.get_pool_names(refresh = True)
+ assert(len(self.data_pools) == 1)
+ return next(iter(self.data_pools.values()))
+
+ def get_data_pool_id(self, refresh = False):
+ """
+ Don't call this if you have multiple data pools
+ :return: integer
+ """
+ if refresh or self.data_pools is None:
+ self.get_pool_names(refresh = True)
+ assert(len(self.data_pools) == 1)
+ return next(iter(self.data_pools.keys()))
+
+ def get_data_pool_names(self, refresh = False):
+ if refresh or self.data_pools is None:
+ self.get_pool_names(refresh = True)
+ return list(self.data_pools.values())
+
+ def get_metadata_pool_name(self):
+ return self.metadata_pool_name
+
+ def set_data_pool_name(self, name):
+ if self.id is not None:
+ raise RuntimeError("can't set filesystem name if its fscid is set")
+ self.data_pool_name = name
+
+ def get_pool_pg_num(self, pool_name):
+ pgs = json.loads(self.mon_manager.raw_cluster_cmd('osd', 'pool', 'get',
+ pool_name, 'pg_num',
+ '--format=json-pretty'))
+ return int(pgs['pg_num'])
+
+ def get_namespace_id(self):
+ return self.id
+
+ def get_pool_df(self, pool_name):
+ """
+ Return a dict like:
+ {u'bytes_used': 0, u'max_avail': 83848701, u'objects': 0, u'kb_used': 0}
+ """
+ for pool_df in self._df()['pools']:
+ if pool_df['name'] == pool_name:
+ return pool_df['stats']
+
+ raise RuntimeError("Pool name '{0}' not found".format(pool_name))
+
+ def get_usage(self):
+ return self._df()['stats']['total_used_bytes']
+
+ def are_daemons_healthy(self, status=None, skip_max_mds_check=False):
+ """
+ Return true if all daemons are in one of active, standby, standby-replay, and
+ at least max_mds daemons are in 'active'.
+
+ Unlike most of Filesystem, this function is tolerant of new-style `fs`
+ commands being missing, because we are part of the ceph installation
+ process during upgrade suites, so must fall back to old style commands
+ when we get an EINVAL on a new style command.
+
+ :return:
+ """
+ # First, check to see that processes haven't exited with an error code
+ for mds in self._ctx.daemons.iter_daemons_of_role('mds'):
+ mds.check_status()
+
+ active_count = 0
+ mds_map = self.get_mds_map(status=status)
+
+ log.debug("are_daemons_healthy: mds map: {0}".format(mds_map))
+
+ for mds_id, mds_status in mds_map['info'].items():
+ if mds_status['state'] not in ["up:active", "up:standby", "up:standby-replay"]:
+ log.warning("Unhealthy mds state {0}:{1}".format(mds_id, mds_status['state']))
+ return False
+ elif mds_status['state'] == 'up:active':
+ active_count += 1
+
+ log.debug("are_daemons_healthy: {0}/{1}".format(
+ active_count, mds_map['max_mds']
+ ))
+
+ if not skip_max_mds_check:
+ if active_count > mds_map['max_mds']:
+ log.debug("are_daemons_healthy: number of actives is greater than max_mds: {0}".format(mds_map))
+ return False
+ elif active_count == mds_map['max_mds']:
+ # The MDSMap says these guys are active, but let's check they really are
+ for mds_id, mds_status in mds_map['info'].items():
+ if mds_status['state'] == 'up:active':
+ try:
+ daemon_status = self.mds_tell(["status"], mds_id=mds_status['name'])
+ except CommandFailedError as cfe:
+ if cfe.exitstatus == errno.EINVAL:
+ # Old version, can't do this check
+ continue
+ else:
+ # MDS not even running
+ return False
+
+ if daemon_status['state'] != 'up:active':
+ # MDS hasn't taken the latest map yet
+ return False
+
+ return True
+ else:
+ return False
+ else:
+ log.debug("are_daemons_healthy: skipping max_mds check")
+ return True
+
+ def get_daemon_names(self, state=None, status=None):
+ """
+ Return MDS daemon names of those daemons in the given state
+ :param state:
+ :return:
+ """
+ mdsmap = self.get_mds_map(status)
+ result = []
+ for mds_status in sorted(mdsmap['info'].values(),
+ key=lambda _: _['rank']):
+ if mds_status['state'] == state or state is None:
+ result.append(mds_status['name'])
+
+ return result
+
+ def get_active_names(self, status=None):
+ """
+ Return MDS daemon names of those daemons holding ranks
+ in state up:active
+
+ :return: list of strings like ['a', 'b'], sorted by rank
+ """
+ return self.get_daemon_names("up:active", status=status)
+
+ def get_all_mds_rank(self, status=None):
+ mdsmap = self.get_mds_map(status)
+ result = []
+ for mds_status in sorted(mdsmap['info'].values(),
+ key=lambda _: _['rank']):
+ if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay':
+ result.append(mds_status['rank'])
+
+ return result
+
+ def get_rank(self, rank=None, status=None):
+ if status is None:
+ status = self.getinfo()
+ if rank is None:
+ rank = 0
+ return status.get_rank(self.id, rank)
+
+ def rank_restart(self, rank=0, status=None):
+ name = self.get_rank(rank=rank, status=status)['name']
+ self.mds_restart(mds_id=name)
+
+ def rank_signal(self, signal, rank=0, status=None):
+ name = self.get_rank(rank=rank, status=status)['name']
+ self.mds_signal(name, signal)
+
+ def rank_freeze(self, yes, rank=0):
+ self.mon_manager.raw_cluster_cmd("mds", "freeze", "{}:{}".format(self.id, rank), str(yes).lower())
+
+ def rank_repaired(self, rank):
+ self.mon_manager.raw_cluster_cmd("mds", "repaired", "{}:{}".format(self.id, rank))
+
+ def rank_fail(self, rank=0):
+ self.mon_manager.raw_cluster_cmd("mds", "fail", "{}:{}".format(self.id, rank))
+
+ def rank_is_running(self, rank=0, status=None):
+ name = self.get_rank(rank=rank, status=status)['name']
+ return self.mds_is_running(name)
+
+ def get_ranks(self, status=None):
+ if status is None:
+ status = self.getinfo()
+ return status.get_ranks(self.id)
+
+ def get_damaged(self, status=None):
+ if status is None:
+ status = self.getinfo()
+ return status.get_damaged(self.id)
+
+ def get_replays(self, status=None):
+ if status is None:
+ status = self.getinfo()
+ return status.get_replays(self.id)
+
+ def get_replay(self, rank=0, status=None):
+ for replay in self.get_replays(status=status):
+ if replay['rank'] == rank:
+ return replay
+ return None
+
+ def get_rank_names(self, status=None):
+ """
+ Return MDS daemon names of those daemons holding a rank,
+ sorted by rank. This includes e.g. up:replay/reconnect
+ as well as active, but does not include standby or
+ standby-replay.
+ """
+ mdsmap = self.get_mds_map(status)
+ result = []
+ for mds_status in sorted(mdsmap['info'].values(),
+ key=lambda _: _['rank']):
+ if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay':
+ result.append(mds_status['name'])
+
+ return result
+
+ def wait_for_daemons(self, timeout=None, skip_max_mds_check=False, status=None):
+ """
+ Wait until all daemons are healthy
+ :return:
+ """
+
+ if timeout is None:
+ timeout = DAEMON_WAIT_TIMEOUT
+
+ if self.id is None:
+ status = self.getinfo(refresh=True)
+
+ if status is None:
+ status = self.status()
+
+ elapsed = 0
+ while True:
+ if self.are_daemons_healthy(status=status, skip_max_mds_check=skip_max_mds_check):
+ return status
+ else:
+ time.sleep(1)
+ elapsed += 1
+
+ if elapsed > timeout:
+ log.debug("status = {0}".format(status))
+ raise RuntimeError("Timed out waiting for MDS daemons to become healthy")
+
+ status = self.status()
+
+ def dencoder(self, obj_type, obj_blob):
+ args = [os.path.join(self._prefix, "ceph-dencoder"), 'type', obj_type, 'import', '-', 'decode', 'dump_json']
+ p = self.mon_manager.controller.run(args=args, stdin=BytesIO(obj_blob), stdout=BytesIO())
+ return p.stdout.getvalue()
+
+ def rados(self, *args, **kwargs):
+ """
+ Callout to rados CLI.
+ """
+
+ return self.mon_manager.do_rados(*args, **kwargs)
+
+ def radosm(self, *args, **kwargs):
+ """
+ Interact with the metadata pool via rados CLI.
+ """
+
+ return self.rados(*args, **kwargs, pool=self.get_metadata_pool_name())
+
+ def radosmo(self, *args, stdout=BytesIO(), **kwargs):
+ """
+ Interact with the metadata pool via rados CLI. Get the stdout.
+ """
+
+ return self.radosm(*args, **kwargs, stdout=stdout).stdout.getvalue()
+
+ def get_metadata_object(self, object_type, object_id):
+ """
+ Retrieve an object from the metadata pool, pass it through
+ ceph-dencoder to dump it to JSON, and return the decoded object.
+ """
+
+ o = self.radosmo(['get', object_id, '-'])
+ j = self.dencoder(object_type, o)
+ try:
+ return json.loads(j)
+ except (TypeError, ValueError):
+ log.error("Failed to decode JSON: '{0}'".format(j))
+ raise
+
+ def get_journal_version(self):
+ """
+ Read the JournalPointer and Journal::Header objects to learn the version of
+ encoding in use.
+ """
+ journal_pointer_object = '400.00000000'
+ journal_pointer_dump = self.get_metadata_object("JournalPointer", journal_pointer_object)
+ journal_ino = journal_pointer_dump['journal_pointer']['front']
+
+ journal_header_object = "{0:x}.00000000".format(journal_ino)
+ journal_header_dump = self.get_metadata_object('Journaler::Header', journal_header_object)
+
+ version = journal_header_dump['journal_header']['stream_format']
+ log.debug("Read journal version {0}".format(version))
+
+ return version
+
+ def mds_asok(self, command, mds_id=None, timeout=None):
+ if mds_id is None:
+ return self.rank_asok(command, timeout=timeout)
+
+ return self.json_asok(command, 'mds', mds_id, timeout=timeout)
+
+ def mds_tell(self, command, mds_id=None):
+ if mds_id is None:
+ return self.rank_tell(command)
+
+ return json.loads(self.mon_manager.raw_cluster_cmd("tell", f"mds.{mds_id}", *command))
+
+ def rank_asok(self, command, rank=0, status=None, timeout=None):
+ info = self.get_rank(rank=rank, status=status)
+ return self.json_asok(command, 'mds', info['name'], timeout=timeout)
+
+ def rank_tell(self, command, rank=0, status=None):
+ try:
+ out = self.mon_manager.raw_cluster_cmd("tell", f"mds.{self.id}:{rank}", *command)
+ return json.loads(out)
+ except json.decoder.JSONDecodeError:
+ log.error("could not decode: {}".format(out))
+ raise
+
+ def ranks_tell(self, command, status=None):
+ if status is None:
+ status = self.status()
+ out = []
+ for r in status.get_ranks(self.id):
+ result = self.rank_tell(command, rank=r['rank'], status=status)
+ out.append((r['rank'], result))
+ return sorted(out)
+
+ def ranks_perf(self, f, status=None):
+ perf = self.ranks_tell(["perf", "dump"], status=status)
+ out = []
+ for rank, perf in perf:
+ out.append((rank, f(perf)))
+ return out
+
+ def read_cache(self, path, depth=None, rank=None):
+ cmd = ["dump", "tree", path]
+ if depth is not None:
+ cmd.append(depth.__str__())
+ result = self.rank_asok(cmd, rank=rank)
+ if result is None or len(result) == 0:
+ raise RuntimeError("Path not found in cache: {0}".format(path))
+
+ return result
+
+ def wait_for_state(self, goal_state, reject=None, timeout=None, mds_id=None, rank=None):
+ """
+ Block until the MDS reaches a particular state, or a failure condition
+ is met.
+
+ When there are multiple MDSs, succeed when exaclty one MDS is in the
+ goal state, or fail when any MDS is in the reject state.
+
+ :param goal_state: Return once the MDS is in this state
+ :param reject: Fail if the MDS enters this state before the goal state
+ :param timeout: Fail if this many seconds pass before reaching goal
+ :return: number of seconds waited, rounded down to integer
+ """
+
+ started_at = time.time()
+ while True:
+ status = self.status()
+ if rank is not None:
+ try:
+ mds_info = status.get_rank(self.id, rank)
+ current_state = mds_info['state'] if mds_info else None
+ log.debug("Looked up MDS state for mds.{0}: {1}".format(rank, current_state))
+ except:
+ mdsmap = self.get_mds_map(status=status)
+ if rank in mdsmap['failed']:
+ log.debug("Waiting for rank {0} to come back.".format(rank))
+ current_state = None
+ else:
+ raise
+ elif mds_id is not None:
+ # mds_info is None if no daemon with this ID exists in the map
+ mds_info = status.get_mds(mds_id)
+ current_state = mds_info['state'] if mds_info else None
+ log.debug("Looked up MDS state for {0}: {1}".format(mds_id, current_state))
+ else:
+ # In general, look for a single MDS
+ states = [m['state'] for m in status.get_ranks(self.id)]
+ if [s for s in states if s == goal_state] == [goal_state]:
+ current_state = goal_state
+ elif reject in states:
+ current_state = reject
+ else:
+ current_state = None
+ log.debug("mapped states {0} to {1}".format(states, current_state))
+
+ elapsed = time.time() - started_at
+ if current_state == goal_state:
+ log.debug("reached state '{0}' in {1}s".format(current_state, elapsed))
+ return elapsed
+ elif reject is not None and current_state == reject:
+ raise RuntimeError("MDS in reject state {0}".format(current_state))
+ elif timeout is not None and elapsed > timeout:
+ log.error("MDS status at timeout: {0}".format(status.get_fsmap(self.id)))
+ raise RuntimeError(
+ "Reached timeout after {0} seconds waiting for state {1}, while in state {2}".format(
+ elapsed, goal_state, current_state
+ ))
+ else:
+ time.sleep(1)
+
+ def _read_data_xattr(self, ino_no, xattr_name, obj_type, pool):
+ if pool is None:
+ pool = self.get_data_pool_name()
+
+ obj_name = "{0:x}.00000000".format(ino_no)
+
+ args = ["getxattr", obj_name, xattr_name]
+ try:
+ proc = self.rados(args, pool=pool, stdout=BytesIO())
+ except CommandFailedError as e:
+ log.error(e.__str__())
+ raise ObjectNotFound(obj_name)
+
+ obj_blob = proc.stdout.getvalue()
+ return json.loads(self.dencoder(obj_type, obj_blob).strip())
+
+ def _write_data_xattr(self, ino_no, xattr_name, data, pool=None):
+ """
+ Write to an xattr of the 0th data object of an inode. Will
+ succeed whether the object and/or xattr already exist or not.
+
+ :param ino_no: integer inode number
+ :param xattr_name: string name of the xattr
+ :param data: byte array data to write to the xattr
+ :param pool: name of data pool or None to use primary data pool
+ :return: None
+ """
+ if pool is None:
+ pool = self.get_data_pool_name()
+
+ obj_name = "{0:x}.00000000".format(ino_no)
+ args = ["setxattr", obj_name, xattr_name, data]
+ self.rados(args, pool=pool)
+
+ def read_symlink(self, ino_no, pool=None):
+ return self._read_data_xattr(ino_no, "symlink", "string_wrapper", pool)
+
+ def read_backtrace(self, ino_no, pool=None):
+ """
+ Read the backtrace from the data pool, return a dict in the format
+ given by inode_backtrace_t::dump, which is something like:
+
+ ::
+
+ rados -p cephfs_data getxattr 10000000002.00000000 parent > out.bin
+ ceph-dencoder type inode_backtrace_t import out.bin decode dump_json
+
+ { "ino": 1099511627778,
+ "ancestors": [
+ { "dirino": 1,
+ "dname": "blah",
+ "version": 11}],
+ "pool": 1,
+ "old_pools": []}
+
+ :param pool: name of pool to read backtrace from. If omitted, FS must have only
+ one data pool and that will be used.
+ """
+ return self._read_data_xattr(ino_no, "parent", "inode_backtrace_t", pool)
+
+ def read_layout(self, ino_no, pool=None):
+ """
+ Read 'layout' xattr of an inode and parse the result, returning a dict like:
+ ::
+ {
+ "stripe_unit": 4194304,
+ "stripe_count": 1,
+ "object_size": 4194304,
+ "pool_id": 1,
+ "pool_ns": "",
+ }
+
+ :param pool: name of pool to read backtrace from. If omitted, FS must have only
+ one data pool and that will be used.
+ """
+ return self._read_data_xattr(ino_no, "layout", "file_layout_t", pool)
+
+ def _enumerate_data_objects(self, ino, size):
+ """
+ Get the list of expected data objects for a range, and the list of objects
+ that really exist.
+
+ :return a tuple of two lists of strings (expected, actual)
+ """
+ stripe_size = 1024 * 1024 * 4
+
+ size = max(stripe_size, size)
+
+ want_objects = [
+ "{0:x}.{1:08x}".format(ino, n)
+ for n in range(0, ((size - 1) // stripe_size) + 1)
+ ]
+
+ exist_objects = self.rados(["ls"], pool=self.get_data_pool_name(), stdout=StringIO()).stdout.getvalue().split("\n")
+
+ return want_objects, exist_objects
+
+ def data_objects_present(self, ino, size):
+ """
+ Check that *all* the expected data objects for an inode are present in the data pool
+ """
+
+ want_objects, exist_objects = self._enumerate_data_objects(ino, size)
+ missing = set(want_objects) - set(exist_objects)
+
+ if missing:
+ log.debug("Objects missing (ino {0}, size {1}): {2}".format(
+ ino, size, missing
+ ))
+ return False
+ else:
+ log.debug("All objects for ino {0} size {1} found".format(ino, size))
+ return True
+
+ def data_objects_absent(self, ino, size):
+ want_objects, exist_objects = self._enumerate_data_objects(ino, size)
+ present = set(want_objects) & set(exist_objects)
+
+ if present:
+ log.debug("Objects not absent (ino {0}, size {1}): {2}".format(
+ ino, size, present
+ ))
+ return False
+ else:
+ log.debug("All objects for ino {0} size {1} are absent".format(ino, size))
+ return True
+
+ def dirfrag_exists(self, ino, frag):
+ try:
+ self.radosm(["stat", "{0:x}.{1:08x}".format(ino, frag)])
+ except CommandFailedError:
+ return False
+ else:
+ return True
+
+ def list_dirfrag(self, dir_ino):
+ """
+ Read the named object and return the list of omap keys
+
+ :return a list of 0 or more strings
+ """
+
+ dirfrag_obj_name = "{0:x}.00000000".format(dir_ino)
+
+ try:
+ key_list_str = self.radosmo(["listomapkeys", dirfrag_obj_name], stdout=StringIO())
+ except CommandFailedError as e:
+ log.error(e.__str__())
+ raise ObjectNotFound(dirfrag_obj_name)
+
+ return key_list_str.strip().split("\n") if key_list_str else []
+
+ def get_meta_of_fs_file(self, dir_ino, obj_name, out):
+ """
+ get metadata from parent to verify the correctness of the data format encoded by the tool, cephfs-meta-injection.
+ warning : The splitting of directory is not considered here.
+ """
+
+ dirfrag_obj_name = "{0:x}.00000000".format(dir_ino)
+ try:
+ self.radosm(["getomapval", dirfrag_obj_name, obj_name+"_head", out])
+ except CommandFailedError as e:
+ log.error(e.__str__())
+ raise ObjectNotFound(dir_ino)
+
+ def erase_metadata_objects(self, prefix):
+ """
+ For all objects in the metadata pool matching the prefix,
+ erase them.
+
+ This O(N) with the number of objects in the pool, so only suitable
+ for use on toy test filesystems.
+ """
+ all_objects = self.radosmo(["ls"], stdout=StringIO()).strip().split("\n")
+ matching_objects = [o for o in all_objects if o.startswith(prefix)]
+ for o in matching_objects:
+ self.radosm(["rm", o])
+
+ def erase_mds_objects(self, rank):
+ """
+ Erase all the per-MDS objects for a particular rank. This includes
+ inotable, sessiontable, journal
+ """
+
+ def obj_prefix(multiplier):
+ """
+ MDS object naming conventions like rank 1's
+ journal is at 201.***
+ """
+ return "%x." % (multiplier * 0x100 + rank)
+
+ # MDS_INO_LOG_OFFSET
+ self.erase_metadata_objects(obj_prefix(2))
+ # MDS_INO_LOG_BACKUP_OFFSET
+ self.erase_metadata_objects(obj_prefix(3))
+ # MDS_INO_LOG_POINTER_OFFSET
+ self.erase_metadata_objects(obj_prefix(4))
+ # MDSTables & SessionMap
+ self.erase_metadata_objects("mds{rank:d}_".format(rank=rank))
+
+ @property
+ def _prefix(self):
+ """
+ Override this to set a different
+ """
+ return ""
+
+ def _make_rank(self, rank):
+ return "{}:{}".format(self.name, rank)
+
+ def _run_tool(self, tool, args, rank=None, quiet=False):
+ # Tests frequently have [client] configuration that jacks up
+ # the objecter log level (unlikely to be interesting here)
+ # and does not set the mds log level (very interesting here)
+ if quiet:
+ base_args = [os.path.join(self._prefix, tool), '--debug-mds=1', '--debug-objecter=1']
+ else:
+ base_args = [os.path.join(self._prefix, tool), '--debug-mds=20', '--debug-ms=1', '--debug-objecter=1']
+
+ if rank is not None:
+ base_args.extend(["--rank", "%s" % str(rank)])
+
+ t1 = datetime.datetime.now()
+ r = self.tool_remote.sh(script=base_args + args, stdout=StringIO()).strip()
+ duration = datetime.datetime.now() - t1
+ log.debug("Ran {0} in time {1}, result:\n{2}".format(
+ base_args + args, duration, r
+ ))
+ return r
+
+ @property
+ def tool_remote(self):
+ """
+ An arbitrary remote to use when invoking recovery tools. Use an MDS host because
+ it'll definitely have keys with perms to access cephfs metadata pool. This is public
+ so that tests can use this remote to go get locally written output files from the tools.
+ """
+ return self.mon_manager.controller
+
+ def journal_tool(self, args, rank, quiet=False):
+ """
+ Invoke cephfs-journal-tool with the passed arguments for a rank, and return its stdout
+ """
+ fs_rank = self._make_rank(rank)
+ return self._run_tool("cephfs-journal-tool", args, fs_rank, quiet)
+
+ def meta_tool(self, args, rank, quiet=False):
+ """
+ Invoke cephfs-meta-injection with the passed arguments for a rank, and return its stdout
+ """
+ fs_rank = self._make_rank(rank)
+ return self._run_tool("cephfs-meta-injection", args, fs_rank, quiet)
+
+ def table_tool(self, args, quiet=False):
+ """
+ Invoke cephfs-table-tool with the passed arguments, and return its stdout
+ """
+ return self._run_tool("cephfs-table-tool", args, None, quiet)
+
+ def data_scan(self, args, quiet=False, worker_count=1):
+ """
+ Invoke cephfs-data-scan with the passed arguments, and return its stdout
+
+ :param worker_count: if greater than 1, multiple workers will be run
+ in parallel and the return value will be None
+ """
+
+ workers = []
+
+ for n in range(0, worker_count):
+ if worker_count > 1:
+ # data-scan args first token is a command, followed by args to it.
+ # insert worker arguments after the command.
+ cmd = args[0]
+ worker_args = [cmd] + ["--worker_n", n.__str__(), "--worker_m", worker_count.__str__()] + args[1:]
+ else:
+ worker_args = args
+
+ workers.append(Greenlet.spawn(lambda wargs=worker_args:
+ self._run_tool("cephfs-data-scan", wargs, None, quiet)))
+
+ for w in workers:
+ w.get()
+
+ if worker_count == 1:
+ return workers[0].value
+ else:
+ return None
+
+ def is_full(self):
+ return self.is_pool_full(self.get_data_pool_name())
+
+ def authorize(self, client_id, caps=('/', 'rw')):
+ """
+ Run "ceph fs authorize" and run "ceph auth get" to get and returnt the
+ keyring.
+
+ client_id: client id that will be authorized
+ caps: tuple containing the path and permission (can be r or rw)
+ respectively.
+ """
+ if isinstance(caps[0], (tuple, list)):
+ x = []
+ for c in caps:
+ x.extend(c)
+ caps = tuple(x)
+
+ client_name = 'client.' + client_id
+ return self.mon_manager.raw_cluster_cmd('fs', 'authorize', self.name,
+ client_name, *caps)
+
+ def grow(self, new_max_mds, status=None):
+ oldmax = self.get_var('max_mds', status=status)
+ assert(new_max_mds > oldmax)
+ self.set_max_mds(new_max_mds)
+ return self.wait_for_daemons()
+
+ def shrink(self, new_max_mds, status=None):
+ oldmax = self.get_var('max_mds', status=status)
+ assert(new_max_mds < oldmax)
+ self.set_max_mds(new_max_mds)
+ return self.wait_for_daemons()
+
+ def run_scrub(self, cmd, rank=0):
+ return self.rank_tell(["scrub"] + cmd, rank)
+
+ def get_scrub_status(self, rank=0):
+ return self.run_scrub(["status"], rank)
+
+ def flush(self, rank=0):
+ return self.rank_tell(["flush", "journal"], rank=rank)
+
+ def wait_until_scrub_complete(self, result=None, tag=None, rank=0, sleep=30,
+ timeout=300, reverse=False):
+ # time out after "timeout" seconds and assume as done
+ if result is None:
+ result = "no active scrubs running"
+ with contextutil.safe_while(sleep=sleep, tries=timeout//sleep) as proceed:
+ while proceed():
+ out_json = self.rank_tell(["scrub", "status"], rank=rank)
+ assert out_json is not None
+ if not reverse:
+ if result in out_json['status']:
+ log.info("all active scrubs completed")
+ return True
+ else:
+ if result not in out_json['status']:
+ log.info("all active scrubs completed")
+ return True
+
+ if tag is not None:
+ status = out_json['scrubs'][tag]
+ if status is not None:
+ log.info(f"scrub status for tag:{tag} - {status}")
+ else:
+ log.info(f"scrub has completed for tag:{tag}")
+ return True
+
+ # timed out waiting for scrub to complete
+ return False
+
+ def get_damage(self, rank=None):
+ if rank is None:
+ result = {}
+ for info in self.get_ranks():
+ rank = info['rank']
+ result[rank] = self.get_damage(rank=rank)
+ return result
+ else:
+ return self.rank_tell(['damage', 'ls'], rank=rank)
diff --git a/qa/tasks/cephfs/fuse_mount.py b/qa/tasks/cephfs/fuse_mount.py
new file mode 100644
index 000000000..0b9b17403
--- /dev/null
+++ b/qa/tasks/cephfs/fuse_mount.py
@@ -0,0 +1,533 @@
+import json
+import time
+import logging
+
+from io import StringIO
+from textwrap import dedent
+
+from teuthology.contextutil import MaxWhileTries
+from teuthology.contextutil import safe_while
+from teuthology.orchestra import run
+from teuthology.exceptions import CommandFailedError
+from tasks.ceph_manager import get_valgrind_args
+from tasks.cephfs.mount import CephFSMount, UMOUNT_TIMEOUT
+
+log = logging.getLogger(__name__)
+
+# Refer mount.py for docstrings.
+class FuseMount(CephFSMount):
+ def __init__(self, ctx, test_dir, client_id, client_remote,
+ client_keyring_path=None, cephfs_name=None,
+ cephfs_mntpt=None, hostfs_mntpt=None, brxnet=None,
+ client_config={}):
+ super(FuseMount, self).__init__(ctx=ctx, test_dir=test_dir,
+ client_id=client_id, client_remote=client_remote,
+ client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt,
+ cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet,
+ client_config=client_config)
+
+ self.fuse_daemon = None
+ self._fuse_conn = None
+ self.id = None
+ self.inst = None
+ self.addr = None
+ self.mount_timeout = int(self.client_config.get('mount_timeout', 30))
+
+ self._mount_bin = [
+ 'ceph-fuse', "-f",
+ "--admin-socket", "/var/run/ceph/$cluster-$name.$pid.asok"]
+ self._mount_cmd_cwd = self.test_dir
+ if self.client_config.get('valgrind') is not None:
+ self.cwd = None # get_valgrind_args chdir for us
+ self._mount_cmd_logger = log.getChild('ceph-fuse.{id}'.format(id=self.client_id))
+ self._mount_cmd_stdin = run.PIPE
+
+ def mount(self, mntopts=None, check_status=True, mntargs=None, **kwargs):
+ self.update_attrs(**kwargs)
+ self.assert_and_log_minimum_mount_details()
+
+ self.setup_netns()
+
+ try:
+ return self._mount(mntopts, mntargs, check_status)
+ except RuntimeError:
+ # Catch exceptions by the mount() logic (i.e. not remote command
+ # failures) and ensure the mount is not left half-up.
+ # Otherwise we might leave a zombie mount point that causes
+ # anyone traversing cephtest/ to get hung up on.
+ log.warning("Trying to clean up after failed mount")
+ self.umount_wait(force=True)
+ raise
+
+ def _mount(self, mntopts, mntargs, check_status):
+ log.info("Client client.%s config is %s" % (self.client_id,
+ self.client_config))
+
+ self._create_mntpt()
+
+ retval = self._run_mount_cmd(mntopts, mntargs, check_status)
+ if retval:
+ return retval
+
+ self.gather_mount_info()
+
+ def _run_mount_cmd(self, mntopts, mntargs, check_status):
+ mount_cmd = self._get_mount_cmd(mntopts, mntargs)
+ mountcmd_stdout, mountcmd_stderr = StringIO(), StringIO()
+
+ # Before starting ceph-fuse process, note the contents of
+ # /sys/fs/fuse/connections
+ pre_mount_conns = self._list_fuse_conns()
+ log.info("Pre-mount connections: {0}".format(pre_mount_conns))
+
+ self.fuse_daemon = self.client_remote.run(
+ args=mount_cmd,
+ cwd=self._mount_cmd_cwd,
+ logger=self._mount_cmd_logger,
+ stdin=self._mount_cmd_stdin,
+ stdout=mountcmd_stdout,
+ stderr=mountcmd_stderr,
+ wait=False
+ )
+
+ return self._wait_and_record_our_fuse_conn(
+ check_status, pre_mount_conns, mountcmd_stdout, mountcmd_stderr)
+
+ def _get_mount_cmd(self, mntopts, mntargs):
+ daemon_signal = 'kill'
+ if self.client_config.get('coverage') or \
+ self.client_config.get('valgrind') is not None:
+ daemon_signal = 'term'
+
+ mount_cmd = ['sudo', 'adjust-ulimits', 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=self.test_dir),
+ 'daemon-helper', daemon_signal]
+
+ mount_cmd = self._add_valgrind_args(mount_cmd)
+ mount_cmd = ['sudo'] + self._nsenter_args + mount_cmd
+
+ mount_cmd += self._mount_bin + [self.hostfs_mntpt]
+ if self.client_id:
+ mount_cmd += ['--id', self.client_id]
+ if self.client_keyring_path and self.client_id:
+ mount_cmd += ['-k', self.client_keyring_path]
+
+ self.validate_subvol_options()
+
+ if self.cephfs_mntpt:
+ mount_cmd += ["--client_mountpoint=" + self.cephfs_mntpt]
+
+ if self.cephfs_name:
+ mount_cmd += ["--client_fs=" + self.cephfs_name]
+ if mntopts:
+ mount_cmd.extend(('-o', ','.join(mntopts)))
+ if mntargs:
+ mount_cmd.extend(mntargs)
+
+ return mount_cmd
+
+ def _add_valgrind_args(self, mount_cmd):
+ if self.client_config.get('valgrind') is not None:
+ mount_cmd = get_valgrind_args(
+ self.test_dir,
+ 'client.{id}'.format(id=self.client_id),
+ mount_cmd,
+ self.client_config.get('valgrind'),
+ cd=False
+ )
+
+ return mount_cmd
+
+ def _list_fuse_conns(self):
+ conn_dir = "/sys/fs/fuse/connections"
+
+ self.client_remote.run(args=['sudo', 'modprobe', 'fuse'],
+ check_status=False)
+ self.client_remote.run(
+ args=["sudo", "mount", "-t", "fusectl", conn_dir, conn_dir],
+ check_status=False, timeout=(30))
+
+ try:
+ ls_str = self.client_remote.sh("ls " + conn_dir,
+ stdout=StringIO(),
+ timeout=300).strip()
+ except CommandFailedError:
+ return []
+
+ if ls_str:
+ return [int(n) for n in ls_str.split("\n")]
+ else:
+ return []
+
+ def _wait_and_record_our_fuse_conn(self, check_status, pre_mount_conns,
+ mountcmd_stdout, mountcmd_stderr):
+ """
+ Wait for the connection reference to appear in /sys
+ """
+ waited = 0
+
+ post_mount_conns = self._list_fuse_conns()
+ while len(post_mount_conns) <= len(pre_mount_conns):
+ if self.fuse_daemon.finished:
+ # Did mount fail? Raise the CommandFailedError instead of
+ # hitting the "failed to populate /sys/" timeout
+ try:
+ self.fuse_daemon.wait()
+ except CommandFailedError as e:
+ log.info('mount command failed.')
+ if check_status:
+ raise
+ else:
+ return (e, mountcmd_stdout.getvalue(),
+ mountcmd_stderr.getvalue())
+ time.sleep(1)
+ waited += 1
+ if waited > self._fuse_conn_check_timeout:
+ raise RuntimeError(
+ "Fuse mount failed to populate/sys/ after {} "
+ "seconds".format(waited))
+ else:
+ post_mount_conns = self._list_fuse_conns()
+
+ log.info("Post-mount connections: {0}".format(post_mount_conns))
+
+ self._record_our_fuse_conn(pre_mount_conns, post_mount_conns)
+
+ @property
+ def _fuse_conn_check_timeout(self):
+ mount_wait = self.client_config.get('mount_wait', 0)
+ if mount_wait > 0:
+ log.info("Fuse mount waits {0} seconds before checking /sys/".format(mount_wait))
+ time.sleep(mount_wait)
+ timeout = int(self.client_config.get('mount_timeout', 30))
+ return timeout
+
+ def _record_our_fuse_conn(self, pre_mount_conns, post_mount_conns):
+ """
+ Record our fuse connection number so that we can use it when forcing
+ an unmount.
+ """
+ new_conns = list(set(post_mount_conns) - set(pre_mount_conns))
+ if len(new_conns) == 0:
+ raise RuntimeError("New fuse connection directory not found ({0})".format(new_conns))
+ elif len(new_conns) > 1:
+ raise RuntimeError("Unexpectedly numerous fuse connections {0}".format(new_conns))
+ else:
+ self._fuse_conn = new_conns[0]
+
+ def gather_mount_info(self):
+ status = self.admin_socket(['status'])
+ self.id = status['id']
+ self.client_pid = status['metadata']['pid']
+ try:
+ self.inst = status['inst_str']
+ self.addr = status['addr_str']
+ except KeyError:
+ sessions = self.fs.rank_asok(['session', 'ls'])
+ for s in sessions:
+ if s['id'] == self.id:
+ self.inst = s['inst']
+ self.addr = self.inst.split()[1]
+ if self.inst is None:
+ raise RuntimeError("cannot find client session")
+
+ def check_mounted_state(self):
+ proc = self.client_remote.run(
+ args=[
+ 'stat',
+ '--file-system',
+ '--printf=%T\n',
+ '--',
+ self.hostfs_mntpt,
+ ],
+ stdout=StringIO(),
+ stderr=StringIO(),
+ wait=False,
+ timeout=300
+ )
+ try:
+ proc.wait()
+ except CommandFailedError:
+ error = proc.stderr.getvalue()
+ if ("endpoint is not connected" in error
+ or "Software caused connection abort" in error):
+ # This happens is fuse is killed without unmount
+ log.warning("Found stale mount point at {0}".format(self.hostfs_mntpt))
+ return True
+ else:
+ # This happens if the mount directory doesn't exist
+ log.info('mount point does not exist: %s', self.hostfs_mntpt)
+ return False
+
+ fstype = proc.stdout.getvalue().rstrip('\n')
+ if fstype == 'fuseblk':
+ log.info('ceph-fuse is mounted on %s', self.hostfs_mntpt)
+ return True
+ else:
+ log.debug('ceph-fuse not mounted, got fs type {fstype!r}'.format(
+ fstype=fstype))
+ return False
+
+ def wait_until_mounted(self):
+ """
+ Check to make sure that fuse is mounted on mountpoint. If not,
+ sleep for 5 seconds and check again.
+ """
+
+ while not self.check_mounted_state():
+ # Even if it's not mounted, it should at least
+ # be running: catch simple failures where it has terminated.
+ assert not self.fuse_daemon.poll()
+
+ time.sleep(5)
+
+ # Now that we're mounted, set permissions so that the rest of the test
+ # will have unrestricted access to the filesystem mount.
+ for retry in range(10):
+ try:
+ stderr = StringIO()
+ self.client_remote.run(args=['sudo', 'chmod', '1777',
+ self.hostfs_mntpt],
+ timeout=300,
+ stderr=stderr, omit_sudo=False)
+ break
+ except run.CommandFailedError:
+ stderr = stderr.getvalue().lower()
+ if "read-only file system" in stderr:
+ break
+ elif "permission denied" in stderr:
+ time.sleep(5)
+ else:
+ raise
+
+ def _mountpoint_exists(self):
+ return self.client_remote.run(args=["ls", "-d", self.hostfs_mntpt],
+ check_status=False,
+ timeout=300).exitstatus == 0
+
+ def umount(self, cleanup=True):
+ """
+ umount() must not run cleanup() when it's called by umount_wait()
+ since "run.wait([self.fuse_daemon], timeout)" would hang otherwise.
+ """
+ if not self.is_mounted():
+ if cleanup:
+ self.cleanup()
+ return
+ if self.is_blocked():
+ self._run_umount_lf()
+ if cleanup:
+ self.cleanup()
+ return
+
+ try:
+ log.info('Running fusermount -u on {name}...'.format(name=self.client_remote.name))
+ stderr = StringIO()
+ self.client_remote.run(
+ args=['sudo', 'fusermount', '-u', self.hostfs_mntpt],
+ stderr=stderr, timeout=UMOUNT_TIMEOUT, omit_sudo=False)
+ except run.CommandFailedError:
+ if "mountpoint not found" in stderr.getvalue():
+ # This happens if the mount directory doesn't exist
+ log.info('mount point does not exist: %s', self.mountpoint)
+ elif "not mounted" in stderr.getvalue():
+ # This happens if the mount directory already unmouted
+ log.info('mount point not mounted: %s', self.mountpoint)
+ else:
+ log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=self.client_remote.name))
+
+ self.client_remote.run(
+ args=['sudo', run.Raw('PATH=/usr/sbin:$PATH'), 'lsof',
+ run.Raw(';'), 'ps', 'auxf'],
+ timeout=UMOUNT_TIMEOUT, omit_sudo=False)
+
+ # abort the fuse mount, killing all hung processes
+ if self._fuse_conn:
+ self.run_python(dedent("""
+ import os
+ path = "/sys/fs/fuse/connections/{0}/abort"
+ if os.path.exists(path):
+ open(path, "w").write("1")
+ """).format(self._fuse_conn))
+ self._fuse_conn = None
+
+ # make sure its unmounted
+ self._run_umount_lf()
+
+ self._fuse_conn = None
+ self.id = None
+ self.inst = None
+ self.addr = None
+ if cleanup:
+ self.cleanup()
+
+ def umount_wait(self, force=False, require_clean=False,
+ timeout=UMOUNT_TIMEOUT):
+ """
+ :param force: Complete cleanly even if the MDS is offline
+ """
+ if not (self.is_mounted() and self.fuse_daemon):
+ log.debug('ceph-fuse client.{id} is not mounted at {remote} '
+ '{mnt}'.format(id=self.client_id,
+ remote=self.client_remote,
+ mnt=self.hostfs_mntpt))
+ self.cleanup()
+ return
+
+ if force:
+ assert not require_clean # mutually exclusive
+
+ # When we expect to be forcing, kill the ceph-fuse process directly.
+ # This should avoid hitting the more aggressive fallback killing
+ # in umount() which can affect other mounts too.
+ self.fuse_daemon.stdin.close()
+
+ # However, we will still hit the aggressive wait if there is an ongoing
+ # mount -o remount (especially if the remount is stuck because MDSs
+ # are unavailable)
+
+ if self.is_blocked():
+ self._run_umount_lf()
+ self.cleanup()
+ return
+
+ # cleanup is set to to fail since clieanup must happen after umount is
+ # complete; otherwise following call to run.wait hangs.
+ self.umount(cleanup=False)
+
+ try:
+ # Permit a timeout, so that we do not block forever
+ run.wait([self.fuse_daemon], timeout)
+
+ except MaxWhileTries:
+ log.error("process failed to terminate after unmount. This probably"
+ " indicates a bug within ceph-fuse.")
+ raise
+ except CommandFailedError:
+ if require_clean:
+ raise
+
+ self.cleanup()
+
+ def teardown(self):
+ """
+ Whatever the state of the mount, get it gone.
+ """
+ super(FuseMount, self).teardown()
+
+ self.umount()
+
+ if self.fuse_daemon and not self.fuse_daemon.finished:
+ self.fuse_daemon.stdin.close()
+ try:
+ self.fuse_daemon.wait()
+ except CommandFailedError:
+ pass
+
+ def _asok_path(self):
+ return "/var/run/ceph/ceph-client.{0}.*.asok".format(self.client_id)
+
+ @property
+ def _prefix(self):
+ return ""
+
+ def find_admin_socket(self):
+ pyscript = """
+import glob
+import re
+import os
+import subprocess
+
+def _find_admin_socket(client_name):
+ asok_path = "{asok_path}"
+ files = glob.glob(asok_path)
+ mountpoint = "{mountpoint}"
+
+ # Given a non-glob path, it better be there
+ if "*" not in asok_path:
+ assert(len(files) == 1)
+ return files[0]
+
+ for f in files:
+ pid = re.match(".*\.(\d+)\.asok$", f).group(1)
+ if os.path.exists("/proc/{{0}}".format(pid)):
+ with open("/proc/{{0}}/cmdline".format(pid), 'r') as proc_f:
+ contents = proc_f.read()
+ if mountpoint in contents:
+ return f
+ raise RuntimeError("Client socket {{0}} not found".format(client_name))
+
+print(_find_admin_socket("{client_name}"))
+""".format(
+ asok_path=self._asok_path(),
+ client_name="client.{0}".format(self.client_id),
+ mountpoint=self.mountpoint)
+
+ asok_path = self.run_python(pyscript, sudo=True)
+ log.info("Found client admin socket at {0}".format(asok_path))
+ return asok_path
+
+ def admin_socket(self, args):
+ asok_path = self.find_admin_socket()
+
+ # Query client ID from admin socket, wait 2 seconds
+ # and retry 10 times if it is not ready
+ with safe_while(sleep=2, tries=10) as proceed:
+ while proceed():
+ try:
+ p = self.client_remote.run(args=
+ ['sudo', self._prefix + 'ceph', '--admin-daemon', asok_path] + args,
+ stdout=StringIO(), stderr=StringIO(), wait=False,
+ timeout=300)
+ p.wait()
+ break
+ except CommandFailedError:
+ if "connection refused" in p.stderr.getvalue().lower():
+ pass
+
+ return json.loads(p.stdout.getvalue().strip())
+
+ def get_global_id(self):
+ """
+ Look up the CephFS client ID for this mount
+ """
+ return self.admin_socket(['mds_sessions'])['id']
+
+ def get_global_inst(self):
+ """
+ Look up the CephFS client instance for this mount
+ """
+ return self.inst
+
+ def get_global_addr(self):
+ """
+ Look up the CephFS client addr for this mount
+ """
+ return self.addr
+
+ def get_client_pid(self):
+ """
+ return pid of ceph-fuse process
+ """
+ status = self.admin_socket(['status'])
+ return status['metadata']['pid']
+
+ def get_osd_epoch(self):
+ """
+ Return 2-tuple of osd_epoch, osd_epoch_barrier
+ """
+ status = self.admin_socket(['status'])
+ return status['osd_epoch'], status['osd_epoch_barrier']
+
+ def get_dentry_count(self):
+ """
+ Return 2-tuple of dentry_count, dentry_pinned_count
+ """
+ status = self.admin_socket(['status'])
+ return status['dentry_count'], status['dentry_pinned_count']
+
+ def set_cache_size(self, size):
+ return self.admin_socket(['config', 'set', 'client_cache_size', str(size)])
+
+ def get_op_read_count(self):
+ return self.admin_socket(['perf', 'dump', 'objecter'])['objecter']['osdop_read']
diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py
new file mode 100644
index 000000000..89f6b6639
--- /dev/null
+++ b/qa/tasks/cephfs/kernel_mount.py
@@ -0,0 +1,394 @@
+import errno
+import json
+import logging
+import os
+import re
+
+from io import StringIO
+from textwrap import dedent
+
+from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra import run
+from teuthology.contextutil import MaxWhileTries
+
+from tasks.cephfs.mount import CephFSMount, UMOUNT_TIMEOUT
+
+log = logging.getLogger(__name__)
+
+
+# internal metadata directory
+DEBUGFS_META_DIR = 'meta'
+
+class KernelMount(CephFSMount):
+ def __init__(self, ctx, test_dir, client_id, client_remote,
+ client_keyring_path=None, hostfs_mntpt=None,
+ cephfs_name=None, cephfs_mntpt=None, brxnet=None,
+ client_config={}):
+ super(KernelMount, self).__init__(ctx=ctx, test_dir=test_dir,
+ client_id=client_id, client_remote=client_remote,
+ client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt,
+ cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet,
+ client_config=client_config)
+
+ if client_config.get('debug', False):
+ self.client_remote.run(args=["sudo", "bash", "-c", "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control"])
+ self.client_remote.run(args=["sudo", "bash", "-c", "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control"])
+
+ self.dynamic_debug = self.client_config.get('dynamic_debug', False)
+ self.rbytes = self.client_config.get('rbytes', False)
+ self.snapdirname = client_config.get('snapdirname', '.snap')
+ self.syntax_style = self.client_config.get('syntax', 'v2')
+ self.inst = None
+ self.addr = None
+ self._mount_bin = ['adjust-ulimits', 'ceph-coverage', self.test_dir +\
+ '/archive/coverage', '/bin/mount', '-t', 'ceph']
+
+ def mount(self, mntopts=None, check_status=True, **kwargs):
+ self.update_attrs(**kwargs)
+ self.assert_and_log_minimum_mount_details()
+
+ self.setup_netns()
+
+ if not self.cephfs_mntpt:
+ self.cephfs_mntpt = '/'
+ if not self.cephfs_name:
+ self.cephfs_name = 'cephfs'
+
+ self._create_mntpt()
+
+ retval = self._run_mount_cmd(mntopts, check_status)
+ if retval:
+ return retval
+
+ self._set_filemode_on_mntpt()
+
+ if self.dynamic_debug:
+ kmount_count = self.ctx.get(f'kmount_count.{self.client_remote.hostname}', 0)
+ if kmount_count == 0:
+ self.enable_dynamic_debug()
+ self.ctx[f'kmount_count.{self.client_remote.hostname}'] = kmount_count + 1
+
+ try:
+ self.gather_mount_info()
+ except:
+ log.warn('failed to fetch mount info - tests depending on mount addr/inst may fail!')
+
+ def gather_mount_info(self):
+ self.id = self._get_global_id()
+ self.get_global_inst()
+ self.get_global_addr()
+
+ def _run_mount_cmd(self, mntopts, check_status):
+ mount_cmd = self._get_mount_cmd(mntopts)
+ mountcmd_stdout, mountcmd_stderr = StringIO(), StringIO()
+
+ try:
+ self.client_remote.run(args=mount_cmd, timeout=300,
+ stdout=mountcmd_stdout,
+ stderr=mountcmd_stderr, omit_sudo=False)
+ except CommandFailedError as e:
+ log.info('mount command failed')
+ if check_status:
+ raise
+ else:
+ return (e, mountcmd_stdout.getvalue(),
+ mountcmd_stderr.getvalue())
+ log.info('mount command passed')
+
+ def _make_mount_cmd_old_or_new_style(self):
+ optd = {}
+ mnt_stx = ''
+
+ self.validate_subvol_options()
+
+ assert(self.cephfs_mntpt)
+ if self.syntax_style == 'v1':
+ mnt_stx = f':{self.cephfs_mntpt}'
+ if self.client_id:
+ optd['name'] = self.client_id
+ if self.cephfs_name:
+ optd['mds_namespace'] = self.cephfs_name
+ elif self.syntax_style == 'v2':
+ mnt_stx = f'{self.client_id}@.{self.cephfs_name}={self.cephfs_mntpt}'
+ else:
+ assert 0, f'invalid syntax style: {self.syntax_style}'
+ return (mnt_stx, optd)
+
+ def _get_mount_cmd(self, mntopts):
+ opts = 'norequire_active_mds'
+ if self.client_keyring_path and self.client_id:
+ opts += ',secret=' + self.get_key_from_keyfile()
+ if self.config_path:
+ opts += ',conf=' + self.config_path
+ if self.rbytes:
+ opts += ",rbytes"
+ else:
+ opts += ",norbytes"
+ if self.snapdirname != '.snap':
+ opts += f',snapdirname={self.snapdirname}'
+
+ mount_cmd = ['sudo'] + self._nsenter_args
+ stx_opt = self._make_mount_cmd_old_or_new_style()
+ for opt_name, opt_val in stx_opt[1].items():
+ opts += f',{opt_name}={opt_val}'
+ if mntopts:
+ opts += ',' + ','.join(mntopts)
+ log.info(f'mounting using device: {stx_opt[0]}')
+ # do not fall-back to old-style mount (catch new-style
+ # mount syntax bugs in the kernel). exclude this config
+ # when using v1-style syntax, since old mount helpers
+ # (pre-quincy) would pass this option to the kernel.
+ if self.syntax_style != 'v1':
+ opts += ",nofallback"
+ mount_cmd += self._mount_bin + [stx_opt[0], self.hostfs_mntpt, '-v',
+ '-o', opts]
+ return mount_cmd
+
+ def umount(self, force=False):
+ if not self.is_mounted():
+ self.cleanup()
+ return
+
+ if self.is_blocked():
+ self._run_umount_lf()
+ self.cleanup()
+ return
+
+ log.debug('Unmounting client client.{id}...'.format(id=self.client_id))
+
+ try:
+ cmd=['sudo', 'umount', self.hostfs_mntpt]
+ if force:
+ cmd.append('-f')
+ self.client_remote.run(args=cmd, timeout=UMOUNT_TIMEOUT, omit_sudo=False)
+ except Exception as e:
+ log.debug('Killing processes on client.{id}...'.format(id=self.client_id))
+ self.client_remote.run(
+ args=['sudo', run.Raw('PATH=/usr/sbin:$PATH'), 'lsof',
+ run.Raw(';'), 'ps', 'auxf'],
+ timeout=UMOUNT_TIMEOUT, omit_sudo=False)
+ raise e
+
+ if self.dynamic_debug:
+ kmount_count = self.ctx.get(f'kmount_count.{self.client_remote.hostname}')
+ assert kmount_count
+ if kmount_count == 1:
+ self.disable_dynamic_debug()
+ self.ctx[f'kmount_count.{self.client_remote.hostname}'] = kmount_count - 1
+
+ self.cleanup()
+
+ def umount_wait(self, force=False, require_clean=False,
+ timeout=UMOUNT_TIMEOUT):
+ """
+ Unlike the fuse client, the kernel client's umount is immediate
+ """
+ if not self.is_mounted():
+ self.cleanup()
+ return
+
+ try:
+ self.umount(force)
+ except (CommandFailedError, MaxWhileTries):
+ if not force:
+ raise
+
+ # force delete the netns and umount
+ self._run_umount_lf()
+ self.cleanup()
+
+ def wait_until_mounted(self):
+ """
+ Unlike the fuse client, the kernel client is up and running as soon
+ as the initial mount() function returns.
+ """
+ assert self.is_mounted()
+
+ def teardown(self):
+ super(KernelMount, self).teardown()
+ if self.is_mounted():
+ self.umount()
+
+ def _get_debug_dir(self):
+ """
+ Get the debugfs folder for this mount
+ """
+
+ cluster_name = 'ceph'
+ fsid = self.ctx.ceph[cluster_name].fsid
+
+ global_id = self._get_global_id()
+
+ return os.path.join("/sys/kernel/debug/ceph/", f"{fsid}.client{global_id}")
+
+ def read_debug_file(self, filename):
+ """
+ Read the debug file "filename", return None if the file doesn't exist.
+ """
+
+ path = os.path.join(self._get_debug_dir(), filename)
+
+ stdout = StringIO()
+ stderr = StringIO()
+ try:
+ self.run_shell_payload(f"sudo dd if={path}", timeout=(5 * 60),
+ stdout=stdout, stderr=stderr)
+ return stdout.getvalue()
+ except CommandFailedError:
+ if 'no such file or directory' in stderr.getvalue().lower():
+ return errno.ENOENT
+ elif 'not a directory' in stderr.getvalue().lower():
+ return errno.ENOTDIR
+ elif 'permission denied' in stderr.getvalue().lower():
+ return errno.EACCES
+ raise
+
+ def _get_global_id(self):
+ try:
+ p = self.run_shell_payload("getfattr --only-values -n ceph.client_id .", stdout=StringIO())
+ v = p.stdout.getvalue()
+ prefix = "client"
+ assert v.startswith(prefix)
+ return int(v[len(prefix):])
+ except CommandFailedError:
+ # Probably this fallback can be deleted in a few releases when the kernel xattr is widely available.
+ log.debug("Falling back to messy global_id lookup via /sys...")
+
+ pyscript = dedent("""
+ import glob
+ import os
+ import json
+
+ def get_id_to_dir():
+ result = {}
+ for dir in glob.glob("/sys/kernel/debug/ceph/*"):
+ if os.path.basename(dir) == DEBUGFS_META_DIR:
+ continue
+ mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines()
+ global_id = mds_sessions_lines[0].split()[1].strip('"')
+ client_id = mds_sessions_lines[1].split()[1].strip('"')
+ result[client_id] = global_id
+ return result
+ print(json.dumps(get_id_to_dir()))
+ """)
+
+ output = self.client_remote.sh([
+ 'sudo', 'python3', '-c', pyscript
+ ], timeout=(5*60))
+ client_id_to_global_id = json.loads(output)
+
+ try:
+ return client_id_to_global_id[self.client_id]
+ except KeyError:
+ log.error("Client id '{0}' debug dir not found (clients seen were: {1})".format(
+ self.client_id, ",".join(client_id_to_global_id.keys())
+ ))
+ raise
+
+ def _dynamic_debug_control(self, enable):
+ """
+ Write to dynamic debug control file.
+ """
+ if enable:
+ fdata = "module ceph +p"
+ else:
+ fdata = "module ceph -p"
+
+ self.run_shell_payload(f"""
+sudo modprobe ceph
+echo '{fdata}' | sudo tee /sys/kernel/debug/dynamic_debug/control
+""")
+
+ def enable_dynamic_debug(self):
+ """
+ Enable the dynamic debug.
+ """
+ self._dynamic_debug_control(True)
+
+ def disable_dynamic_debug(self):
+ """
+ Disable the dynamic debug.
+ """
+ self._dynamic_debug_control(False)
+
+ def get_global_id(self):
+ """
+ Look up the CephFS client ID for this mount, using debugfs.
+ """
+
+ assert self.is_mounted()
+
+ return self._get_global_id()
+
+ @property
+ def _global_addr(self):
+ if self.addr is not None:
+ return self.addr
+
+ # The first line of the "status" file's output will be something
+ # like:
+ # "instance: client.4297 (0)10.72.47.117:0/1148470933"
+ # What we need here is only the string "10.72.47.117:0/1148470933"
+ status = self.read_debug_file("status")
+ if status is None:
+ return None
+
+ instance = re.findall(r'instance:.*', status)[0]
+ self.addr = instance.split()[2].split(')')[1]
+ return self.addr;
+
+ @property
+ def _global_inst(self):
+ if self.inst is not None:
+ return self.inst
+
+ client_gid = "client%d" % self.get_global_id()
+ self.inst = " ".join([client_gid, self._global_addr])
+ return self.inst
+
+ def get_global_inst(self):
+ """
+ Look up the CephFS client instance for this mount
+ """
+ return self._global_inst
+
+ def get_global_addr(self):
+ """
+ Look up the CephFS client addr for this mount
+ """
+ return self._global_addr
+
+ def get_osd_epoch(self):
+ """
+ Return 2-tuple of osd_epoch, osd_epoch_barrier
+ """
+ osd_map = self.read_debug_file("osdmap")
+ assert osd_map
+
+ lines = osd_map.split("\n")
+ first_line_tokens = lines[0].split()
+ epoch, barrier = int(first_line_tokens[1]), int(first_line_tokens[3])
+
+ return epoch, barrier
+
+ def get_op_read_count(self):
+ stdout = StringIO()
+ stderr = StringIO()
+ try:
+ path = os.path.join(self._get_debug_dir(), "metrics/size")
+ self.run_shell(f"sudo stat {path}", stdout=stdout,
+ stderr=stderr, cwd=None)
+ buf = self.read_debug_file("metrics/size")
+ except CommandFailedError:
+ if 'no such file or directory' in stderr.getvalue().lower() \
+ or 'not a directory' in stderr.getvalue().lower():
+ try:
+ path = os.path.join(self._get_debug_dir(), "metrics")
+ self.run_shell(f"sudo stat {path}", stdout=stdout,
+ stderr=stderr, cwd=None)
+ buf = self.read_debug_file("metrics")
+ except CommandFailedError:
+ return errno.ENOENT
+ else:
+ return 0
+ return int(re.findall(r'read.*', buf)[0].split()[1])
diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py
new file mode 100644
index 000000000..4a8187406
--- /dev/null
+++ b/qa/tasks/cephfs/mount.py
@@ -0,0 +1,1570 @@
+import hashlib
+import json
+import logging
+import datetime
+import os
+import re
+import time
+
+from io import StringIO
+from contextlib import contextmanager
+from textwrap import dedent
+from IPy import IP
+
+from teuthology.contextutil import safe_while
+from teuthology.misc import get_file, write_file
+from teuthology.orchestra import run
+from teuthology.orchestra.run import Raw
+from teuthology.exceptions import CommandFailedError, ConnectionLostError
+
+from tasks.cephfs.filesystem import Filesystem
+
+log = logging.getLogger(__name__)
+
+
+UMOUNT_TIMEOUT = 300
+
+
+class CephFSMount(object):
+ def __init__(self, ctx, test_dir, client_id, client_remote,
+ client_keyring_path=None, hostfs_mntpt=None,
+ cephfs_name=None, cephfs_mntpt=None, brxnet=None,
+ client_config=None):
+ """
+ :param test_dir: Global teuthology test dir
+ :param client_id: Client ID, the 'foo' in client.foo
+ :param client_keyring_path: path to keyring for given client_id
+ :param client_remote: Remote instance for the host where client will
+ run
+ :param hostfs_mntpt: Path to directory on the FS on which Ceph FS will
+ be mounted
+ :param cephfs_name: Name of Ceph FS to be mounted
+ :param cephfs_mntpt: Path to directory inside Ceph FS that will be
+ mounted as root
+ """
+ self.ctx = ctx
+ self.test_dir = test_dir
+
+ self._verify_attrs(client_id=client_id,
+ client_keyring_path=client_keyring_path,
+ hostfs_mntpt=hostfs_mntpt, cephfs_name=cephfs_name,
+ cephfs_mntpt=cephfs_mntpt)
+
+ if client_config is None:
+ client_config = {}
+ self.client_config = client_config
+
+ self.cephfs_name = cephfs_name
+ self.client_id = client_id
+ self.client_keyring_path = client_keyring_path
+ self.client_remote = client_remote
+ self.cluster_name = 'ceph' # TODO: use config['cluster']
+ self.fs = None
+
+ if cephfs_mntpt is None and client_config.get("mount_path"):
+ self.cephfs_mntpt = client_config.get("mount_path")
+ log.info(f"using client_config[\"cephfs_mntpt\"] = {self.cephfs_mntpt}")
+ else:
+ self.cephfs_mntpt = cephfs_mntpt
+ log.info(f"cephfs_mntpt = {self.cephfs_mntpt}")
+
+ if hostfs_mntpt is None and client_config.get("mountpoint"):
+ self.hostfs_mntpt = client_config.get("mountpoint")
+ log.info(f"using client_config[\"hostfs_mntpt\"] = {self.hostfs_mntpt}")
+ elif hostfs_mntpt is not None:
+ self.hostfs_mntpt = hostfs_mntpt
+ else:
+ self.hostfs_mntpt = os.path.join(self.test_dir, f'mnt.{self.client_id}')
+ self.hostfs_mntpt_dirname = os.path.basename(self.hostfs_mntpt)
+ log.info(f"hostfs_mntpt = {self.hostfs_mntpt}")
+
+ self._netns_name = None
+ self.nsid = -1
+ if brxnet is None:
+ self.ceph_brx_net = '192.168.0.0/16'
+ else:
+ self.ceph_brx_net = brxnet
+
+ self.test_files = ['a', 'b', 'c']
+
+ self.background_procs = []
+
+ # This will cleanup the stale netnses, which are from the
+ # last failed test cases.
+ @staticmethod
+ def cleanup_stale_netnses_and_bridge(remote):
+ p = remote.run(args=['ip', 'netns', 'list'],
+ stdout=StringIO(), timeout=(5*60))
+ p = p.stdout.getvalue().strip()
+
+ # Get the netns name list
+ netns_list = re.findall(r'ceph-ns-[^()\s][-.\w]+[^():\s]', p)
+
+ # Remove the stale netnses
+ for ns in netns_list:
+ ns_name = ns.split()[0]
+ args = ['sudo', 'ip', 'netns', 'delete', '{0}'.format(ns_name)]
+ try:
+ remote.run(args=args, timeout=(5*60), omit_sudo=False)
+ except Exception:
+ pass
+
+ # Remove the stale 'ceph-brx'
+ try:
+ args = ['sudo', 'ip', 'link', 'delete', 'ceph-brx']
+ remote.run(args=args, timeout=(5*60), omit_sudo=False)
+ except Exception:
+ pass
+
+ def _parse_netns_name(self):
+ self._netns_name = '-'.join(["ceph-ns",
+ re.sub(r'/+', "-", self.mountpoint)])
+
+ @property
+ def mountpoint(self):
+ if self.hostfs_mntpt is None:
+ self.hostfs_mntpt = os.path.join(self.test_dir,
+ self.hostfs_mntpt_dirname)
+ return self.hostfs_mntpt
+
+ @mountpoint.setter
+ def mountpoint(self, path):
+ if not isinstance(path, str):
+ raise RuntimeError('path should be of str type.')
+ self._mountpoint = self.hostfs_mntpt = path
+
+ @property
+ def netns_name(self):
+ if self._netns_name == None:
+ self._parse_netns_name()
+ return self._netns_name
+
+ @netns_name.setter
+ def netns_name(self, name):
+ self._netns_name = name
+
+ def assert_that_ceph_fs_exists(self):
+ output = self.ctx.managers[self.cluster_name].raw_cluster_cmd("fs", "ls")
+ if self.cephfs_name:
+ assert self.cephfs_name in output, \
+ 'expected ceph fs is not present on the cluster'
+ log.info(f'Mounting Ceph FS {self.cephfs_name}; just confirmed its presence on cluster')
+ else:
+ assert 'No filesystems enabled' not in output, \
+ 'ceph cluster has no ceph fs, not even the default ceph fs'
+ log.info('Mounting default Ceph FS; just confirmed its presence on cluster')
+
+ def assert_and_log_minimum_mount_details(self):
+ """
+ Make sure we have minimum details required for mounting. Ideally, this
+ method should be called at the beginning of the mount method.
+ """
+ if not self.client_id or not self.client_remote or \
+ not self.hostfs_mntpt:
+ log.error(f"self.client_id = {self.client_id}")
+ log.error(f"self.client_remote = {self.client_remote}")
+ log.error(f"self.hostfs_mntpt = {self.hostfs_mntpt}")
+ errmsg = ('Mounting CephFS requires that at least following '
+ 'details to be provided -\n'
+ '1. the client ID,\n2. the mountpoint and\n'
+ '3. the remote machine where CephFS will be mounted.\n')
+ raise RuntimeError(errmsg)
+
+ self.assert_that_ceph_fs_exists()
+
+ log.info('Mounting Ceph FS. Following are details of mount; remember '
+ '"None" represents Python type None -')
+ log.info(f'self.client_remote.hostname = {self.client_remote.hostname}')
+ log.info(f'self.client.name = client.{self.client_id}')
+ log.info(f'self.hostfs_mntpt = {self.hostfs_mntpt}')
+ log.info(f'self.cephfs_name = {self.cephfs_name}')
+ log.info(f'self.cephfs_mntpt = {self.cephfs_mntpt}')
+ log.info(f'self.client_keyring_path = {self.client_keyring_path}')
+ if self.client_keyring_path:
+ log.info('keyring content -\n' +
+ get_file(self.client_remote, self.client_keyring_path,
+ sudo=True).decode())
+
+ def is_blocked(self):
+ if not self.addr:
+ # can't infer if our addr is blocklisted - let the caller try to
+ # umount without lazy/force. If the client was blocklisted, then
+ # the umount would be stuck and the test would fail on timeout.
+ # happens only with Ubuntu 20.04 (missing kclient patches :/).
+ return False
+ self.fs = Filesystem(self.ctx, name=self.cephfs_name)
+
+ try:
+ output = self.fs.mon_manager.raw_cluster_cmd(args='osd blocklist ls')
+ except CommandFailedError:
+ # Fallback for older Ceph cluster
+ output = self.fs.mon_manager.raw_cluster_cmd(args='osd blacklist ls')
+
+ return self.addr in output
+
+ def is_stuck(self):
+ """
+ Check if mount is stuck/in a hanged state.
+ """
+ if not self.is_mounted():
+ return False
+
+ retval = self.client_remote.run(args=f'sudo stat {self.hostfs_mntpt}',
+ omit_sudo=False, wait=False).returncode
+ if retval == 0:
+ return False
+
+ time.sleep(10)
+ proc = self.client_remote.run(args='ps -ef', stdout=StringIO())
+ # if proc was running even after 10 seconds, it has to be stuck.
+ if f'stat {self.hostfs_mntpt}' in proc.stdout.getvalue():
+ log.critical('client mounted at self.hostfs_mntpt is stuck!')
+ return True
+ return False
+
+ def is_mounted(self):
+ file = self.client_remote.read_file('/proc/self/mounts',stdout=StringIO())
+ if self.hostfs_mntpt in file:
+ return True
+ else:
+ log.debug(f"not mounted; /proc/self/mounts is:\n{file}")
+ return False
+
+ def setupfs(self, name=None):
+ if name is None and self.fs is not None:
+ # Previous mount existed, reuse the old name
+ name = self.fs.name
+ self.fs = Filesystem(self.ctx, name=name)
+ log.info('Wait for MDS to reach steady state...')
+ self.fs.wait_for_daemons()
+ log.info('Ready to start {}...'.format(type(self).__name__))
+
+ def _create_mntpt(self):
+ self.client_remote.run(args=f'mkdir -p -v {self.hostfs_mntpt}',
+ timeout=60)
+ # Use 0000 mode to prevent undesired modifications to the mountpoint on
+ # the local file system.
+ self.client_remote.run(args=f'chmod 0000 {self.hostfs_mntpt}',
+ timeout=60)
+
+ @property
+ def _nsenter_args(self):
+ return ['nsenter', f'--net=/var/run/netns/{self.netns_name}']
+
+ def _set_filemode_on_mntpt(self):
+ stderr = StringIO()
+ try:
+ self.client_remote.run(
+ args=['sudo', 'chmod', '1777', self.hostfs_mntpt],
+ stderr=stderr, timeout=(5*60))
+ except CommandFailedError:
+ # the client does not have write permissions in the caps it holds
+ # for the Ceph FS that was just mounted.
+ if 'permission denied' in stderr.getvalue().lower():
+ pass
+
+ def _setup_brx_and_nat(self):
+ # The ip for ceph-brx should be
+ ip = IP(self.ceph_brx_net)[-2]
+ mask = self.ceph_brx_net.split('/')[1]
+ brd = IP(self.ceph_brx_net).broadcast()
+
+ brx = self.client_remote.run(args=['ip', 'addr'], stderr=StringIO(),
+ stdout=StringIO(), timeout=(5*60))
+ brx = re.findall(r'inet .* ceph-brx', brx.stdout.getvalue())
+ if brx:
+ # If the 'ceph-brx' already exists, then check whether
+ # the new net is conflicting with it
+ _ip, _mask = brx[0].split()[1].split('/', 1)
+ if _ip != "{}".format(ip) or _mask != mask:
+ raise RuntimeError("Conflict with existing ceph-brx {0}, new {1}/{2}".format(brx[0].split()[1], ip, mask))
+
+ # Setup the ceph-brx and always use the last valid IP
+ if not brx:
+ log.info("Setuping the 'ceph-brx' with {0}/{1}".format(ip, mask))
+
+ self.run_shell_payload(f"""
+ set -e
+ sudo ip link add name ceph-brx type bridge
+ sudo ip addr flush dev ceph-brx
+ sudo ip link set ceph-brx up
+ sudo ip addr add {ip}/{mask} brd {brd} dev ceph-brx
+ """, timeout=(5*60), omit_sudo=False, cwd='/')
+
+ args = "echo 1 | sudo tee /proc/sys/net/ipv4/ip_forward"
+ self.client_remote.run(args=args, timeout=(5*60), omit_sudo=False)
+
+ # Setup the NAT
+ p = self.client_remote.run(args=['route'], stderr=StringIO(),
+ stdout=StringIO(), timeout=(5*60))
+ p = re.findall(r'default .*', p.stdout.getvalue())
+ if p == False:
+ raise RuntimeError("No default gw found")
+ gw = p[0].split()[7]
+
+ self.run_shell_payload(f"""
+ set -e
+ sudo iptables -A FORWARD -o {gw} -i ceph-brx -j ACCEPT
+ sudo iptables -A FORWARD -i {gw} -o ceph-brx -j ACCEPT
+ sudo iptables -t nat -A POSTROUTING -s {ip}/{mask} -o {gw} -j MASQUERADE
+ """, timeout=(5*60), omit_sudo=False, cwd='/')
+
+ def _setup_netns(self):
+ p = self.client_remote.run(args=['ip', 'netns', 'list'],
+ stderr=StringIO(), stdout=StringIO(),
+ timeout=(5*60)).stdout.getvalue().strip()
+
+ # Get the netns name list
+ netns_list = re.findall(r'[^()\s][-.\w]+[^():\s]', p)
+
+ out = re.search(r"{0}".format(self.netns_name), p)
+ if out is None:
+ # Get an uniq nsid for the new netns
+ nsid = 0
+ p = self.client_remote.run(args=['ip', 'netns', 'list-id'],
+ stderr=StringIO(), stdout=StringIO(),
+ timeout=(5*60)).stdout.getvalue()
+ while True:
+ out = re.search(r"nsid {} ".format(nsid), p)
+ if out is None:
+ break
+
+ nsid += 1
+
+ # Add one new netns and set it id
+ self.run_shell_payload(f"""
+ set -e
+ sudo ip netns add {self.netns_name}
+ sudo ip netns set {self.netns_name} {nsid}
+ """, timeout=(5*60), omit_sudo=False, cwd='/')
+ self.nsid = nsid;
+ else:
+ # The netns already exists and maybe suspended by self.kill()
+ self.resume_netns();
+
+ nsid = int(re.search(r"{0} \(id: (\d+)\)".format(self.netns_name), p).group(1))
+ self.nsid = nsid;
+ return
+
+ # Get one ip address for netns
+ ips = IP(self.ceph_brx_net)
+ for ip in ips:
+ found = False
+ if ip == ips[0]:
+ continue
+ if ip == ips[-2]:
+ raise RuntimeError("we have ran out of the ip addresses")
+
+ for ns in netns_list:
+ ns_name = ns.split()[0]
+ args = ['sudo', 'ip', 'netns', 'exec', '{0}'.format(ns_name), 'ip', 'addr']
+ try:
+ p = self.client_remote.run(args=args, stderr=StringIO(),
+ stdout=StringIO(), timeout=(5*60),
+ omit_sudo=False)
+ q = re.search("{0}".format(ip), p.stdout.getvalue())
+ if q is not None:
+ found = True
+ break
+ except CommandFailedError:
+ if "No such file or directory" in p.stderr.getvalue():
+ pass
+ if "Invalid argument" in p.stderr.getvalue():
+ pass
+
+ if found == False:
+ break
+
+ mask = self.ceph_brx_net.split('/')[1]
+ brd = IP(self.ceph_brx_net).broadcast()
+
+ log.info("Setuping the netns '{0}' with {1}/{2}".format(self.netns_name, ip, mask))
+
+ # Setup the veth interfaces
+ brxip = IP(self.ceph_brx_net)[-2]
+ self.run_shell_payload(f"""
+ set -e
+ sudo ip link add veth0 netns {self.netns_name} type veth peer name brx.{nsid}
+ sudo ip netns exec {self.netns_name} ip addr add {ip}/{mask} brd {brd} dev veth0
+ sudo ip netns exec {self.netns_name} ip link set veth0 up
+ sudo ip netns exec {self.netns_name} ip link set lo up
+ sudo ip netns exec {self.netns_name} ip route add default via {brxip}
+ """, timeout=(5*60), omit_sudo=False, cwd='/')
+
+ # Bring up the brx interface and join it to 'ceph-brx'
+ self.run_shell_payload(f"""
+ set -e
+ sudo ip link set brx.{nsid} up
+ sudo ip link set dev brx.{nsid} master ceph-brx
+ """, timeout=(5*60), omit_sudo=False, cwd='/')
+
+ def _cleanup_netns(self):
+ if self.nsid == -1:
+ return
+ log.info("Removing the netns '{0}'".format(self.netns_name))
+
+ # Delete the netns and the peer veth interface
+ self.run_shell_payload(f"""
+ set -e
+ sudo ip link set brx.{self.nsid} down
+ sudo ip link delete dev brx.{self.nsid}
+ sudo ip netns delete {self.netns_name}
+ """, timeout=(5*60), omit_sudo=False, cwd='/')
+
+ self.nsid = -1
+
+ def _cleanup_brx_and_nat(self):
+ brx = self.client_remote.run(args=['ip', 'addr'], stderr=StringIO(),
+ stdout=StringIO(), timeout=(5*60))
+ brx = re.findall(r'inet .* ceph-brx', brx.stdout.getvalue())
+ if not brx:
+ return
+
+ # If we are the last netns, will delete the ceph-brx
+ args = ['sudo', 'ip', 'link', 'show']
+ p = self.client_remote.run(args=args, stdout=StringIO(),
+ timeout=(5*60), omit_sudo=False)
+ _list = re.findall(r'brx\.', p.stdout.getvalue().strip())
+ if len(_list) != 0:
+ return
+
+ log.info("Removing the 'ceph-brx'")
+
+ self.run_shell_payload("""
+ set -e
+ sudo ip link set ceph-brx down
+ sudo ip link delete ceph-brx
+ """, timeout=(5*60), omit_sudo=False, cwd='/')
+
+ # Drop the iptables NAT rules
+ ip = IP(self.ceph_brx_net)[-2]
+ mask = self.ceph_brx_net.split('/')[1]
+
+ p = self.client_remote.run(args=['route'], stderr=StringIO(),
+ stdout=StringIO(), timeout=(5*60))
+ p = re.findall(r'default .*', p.stdout.getvalue())
+ if p == False:
+ raise RuntimeError("No default gw found")
+ gw = p[0].split()[7]
+ self.run_shell_payload(f"""
+ set -e
+ sudo iptables -D FORWARD -o {gw} -i ceph-brx -j ACCEPT
+ sudo iptables -D FORWARD -i {gw} -o ceph-brx -j ACCEPT
+ sudo iptables -t nat -D POSTROUTING -s {ip}/{mask} -o {gw} -j MASQUERADE
+ """, timeout=(5*60), omit_sudo=False, cwd='/')
+
+ def setup_netns(self):
+ """
+ Setup the netns for the mountpoint.
+ """
+ log.info("Setting the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint))
+ self._setup_brx_and_nat()
+ self._setup_netns()
+
+ def cleanup_netns(self):
+ """
+ Cleanup the netns for the mountpoint.
+ """
+ # We will defer cleaning the netnses and bridge until the last
+ # mountpoint is unmounted, this will be a temporary work around
+ # for issue#46282.
+
+ # log.info("Cleaning the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint))
+ # self._cleanup_netns()
+ # self._cleanup_brx_and_nat()
+
+ def suspend_netns(self):
+ """
+ Suspend the netns veth interface.
+ """
+ if self.nsid == -1:
+ return
+
+ log.info("Suspending the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint))
+
+ args = ['sudo', 'ip', 'link', 'set', 'brx.{0}'.format(self.nsid), 'down']
+ self.client_remote.run(args=args, timeout=(5*60), omit_sudo=False)
+
+ def resume_netns(self):
+ """
+ Resume the netns veth interface.
+ """
+ if self.nsid == -1:
+ return
+
+ log.info("Resuming the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint))
+
+ args = ['sudo', 'ip', 'link', 'set', 'brx.{0}'.format(self.nsid), 'up']
+ self.client_remote.run(args=args, timeout=(5*60), omit_sudo=False)
+
+ def mount(self, mntopts=[], check_status=True, **kwargs):
+ """
+ kwargs expects its members to be same as the arguments accepted by
+ self.update_attrs().
+ """
+ raise NotImplementedError()
+
+ def mount_wait(self, **kwargs):
+ """
+ Accepts arguments same as self.mount().
+ """
+ self.mount(**kwargs)
+ self.wait_until_mounted()
+
+ def _run_umount_lf(self):
+ log.debug(f'Force/lazy unmounting on client.{self.client_id}')
+
+ try:
+ proc = self.client_remote.run(
+ args=f'sudo umount --lazy --force {self.hostfs_mntpt}',
+ timeout=UMOUNT_TIMEOUT, omit_sudo=False)
+ except CommandFailedError:
+ if self.is_mounted():
+ raise
+
+ return proc
+
+ def umount(self):
+ raise NotImplementedError()
+
+ def umount_wait(self, force=False, require_clean=False,
+ timeout=UMOUNT_TIMEOUT):
+ """
+
+ :param force: Expect that the mount will not shutdown cleanly: kill
+ it hard.
+ :param require_clean: Wait for the Ceph client associated with the
+ mount (e.g. ceph-fuse) to terminate, and
+ raise if it doesn't do so cleanly.
+ :param timeout: amount of time to be waited for umount command to finish
+ :return:
+ """
+ raise NotImplementedError()
+
+ def _verify_attrs(self, **kwargs):
+ """
+ Verify that client_id, client_keyring_path, client_remote, hostfs_mntpt,
+ cephfs_name, cephfs_mntpt are either type str or None.
+ """
+ for k, v in kwargs.items():
+ if v is not None and not isinstance(v, str):
+ raise RuntimeError('value of attributes should be either str '
+ f'or None. {k} - {v}')
+
+ def update_attrs(self, client_id=None, client_keyring_path=None,
+ client_remote=None, hostfs_mntpt=None, cephfs_name=None,
+ cephfs_mntpt=None):
+ if not (client_id or client_keyring_path or client_remote or
+ cephfs_name or cephfs_mntpt or hostfs_mntpt):
+ return
+
+ self._verify_attrs(client_id=client_id,
+ client_keyring_path=client_keyring_path,
+ hostfs_mntpt=hostfs_mntpt, cephfs_name=cephfs_name,
+ cephfs_mntpt=cephfs_mntpt)
+
+ if client_id:
+ self.client_id = client_id
+ if client_keyring_path:
+ self.client_keyring_path = client_keyring_path
+ if client_remote:
+ self.client_remote = client_remote
+ if hostfs_mntpt:
+ self.hostfs_mntpt = hostfs_mntpt
+ if cephfs_name:
+ self.cephfs_name = cephfs_name
+ if cephfs_mntpt:
+ self.cephfs_mntpt = cephfs_mntpt
+
+ def remount(self, **kwargs):
+ """
+ Update mount object's attributes and attempt remount with these
+ new values for these attrbiutes.
+
+ 1. Run umount_wait().
+ 2. Run update_attrs().
+ 3. Run mount().
+
+ Accepts arguments of self.mount() and self.update_attrs() with 1
+ exception: wait accepted too which can be True or False.
+ """
+ self.umount_wait()
+ assert not self.is_mounted()
+
+ mntopts = kwargs.pop('mntopts', [])
+ check_status = kwargs.pop('check_status', True)
+ wait = kwargs.pop('wait', True)
+
+ self.update_attrs(**kwargs)
+
+ retval = self.mount(mntopts=mntopts, check_status=check_status)
+ # avoid this scenario (again): mount command might've failed and
+ # check_status might have silenced the exception, yet we attempt to
+ # wait which might lead to an error.
+ if retval is None and wait:
+ self.wait_until_mounted()
+
+ return retval
+
+ def kill(self):
+ """
+ Suspend the netns veth interface to make the client disconnected
+ from the ceph cluster
+ """
+ log.info('Killing connection on {0}...'.format(self.client_remote.name))
+ self.suspend_netns()
+
+ def kill_cleanup(self):
+ """
+ Follow up ``kill`` to get to a clean unmounted state.
+ """
+ log.info('Cleaning up killed connection on {0}'.format(self.client_remote.name))
+ self.umount_wait(force=True)
+
+ def cleanup(self):
+ """
+ Remove the mount point.
+
+ Prerequisite: the client is not mounted.
+ """
+ log.info('Cleaning up mount {0}'.format(self.client_remote.name))
+ stderr = StringIO()
+ try:
+ self.client_remote.run(args=['rmdir', '--', self.mountpoint],
+ cwd=self.test_dir, stderr=stderr,
+ timeout=(60*5), check_status=False)
+ except CommandFailedError:
+ if "no such file or directory" not in stderr.getvalue().lower():
+ raise
+
+ self.cleanup_netns()
+
+ def wait_until_mounted(self):
+ raise NotImplementedError()
+
+ def get_keyring_path(self):
+ # N.B.: default keyring is /etc/ceph/ceph.keyring; see ceph.py and generate_caps
+ return '/etc/ceph/ceph.client.{id}.keyring'.format(id=self.client_id)
+
+ def get_key_from_keyfile(self):
+ # XXX: don't call run_shell(), since CephFS might be unmounted.
+ keyring = self.client_remote.read_file(self.client_keyring_path).\
+ decode()
+
+ for line in keyring.split('\n'):
+ if line.find('key') != -1:
+ return line[line.find('=') + 1 : ].strip()
+
+ raise RuntimeError('Key not found in keyring file '
+ f'{self.client_keyring_path}. Its contents are -\n'
+ f'{keyring}')
+
+ @property
+ def config_path(self):
+ """
+ Path to ceph.conf: override this if you're not a normal systemwide ceph install
+ :return: stringv
+ """
+ return "/etc/ceph/ceph.conf"
+
+ @contextmanager
+ def mounted_wait(self):
+ """
+ A context manager, from an initially unmounted state, to mount
+ this, yield, and then unmount and clean up.
+ """
+ self.mount()
+ self.wait_until_mounted()
+ try:
+ yield
+ finally:
+ self.umount_wait()
+
+ def create_file(self, filename='testfile', dirname=None, user=None,
+ check_status=True):
+ assert(self.is_mounted())
+
+ if not os.path.isabs(filename):
+ if dirname:
+ if os.path.isabs(dirname):
+ path = os.path.join(dirname, filename)
+ else:
+ path = os.path.join(self.hostfs_mntpt, dirname, filename)
+ else:
+ path = os.path.join(self.hostfs_mntpt, filename)
+ else:
+ path = filename
+
+ if user:
+ args = ['sudo', '-u', user, '-s', '/bin/bash', '-c', 'touch ' + path]
+ else:
+ args = 'touch ' + path
+
+ return self.client_remote.run(args=args, check_status=check_status)
+
+ def create_files(self):
+ assert(self.is_mounted())
+
+ for suffix in self.test_files:
+ log.info("Creating file {0}".format(suffix))
+ self.client_remote.run(args=[
+ 'touch', os.path.join(self.hostfs_mntpt, suffix)
+ ])
+
+ def test_create_file(self, filename='testfile', dirname=None, user=None,
+ check_status=True):
+ return self.create_file(filename=filename, dirname=dirname, user=user,
+ check_status=False)
+
+ def check_files(self):
+ assert(self.is_mounted())
+
+ for suffix in self.test_files:
+ log.info("Checking file {0}".format(suffix))
+ r = self.client_remote.run(args=[
+ 'ls', os.path.join(self.hostfs_mntpt, suffix)
+ ], check_status=False)
+ if r.exitstatus != 0:
+ raise RuntimeError("Expected file {0} not found".format(suffix))
+
+ def write_file(self, path, data, perms=None):
+ """
+ Write the given data at the given path and set the given perms to the
+ file on the path.
+ """
+ if path.find(self.hostfs_mntpt) == -1:
+ path = os.path.join(self.hostfs_mntpt, path)
+
+ write_file(self.client_remote, path, data)
+
+ if perms:
+ self.run_shell(args=f'chmod {perms} {path}')
+
+ def read_file(self, path):
+ """
+ Return the data from the file on given path.
+ """
+ if path.find(self.hostfs_mntpt) == -1:
+ path = os.path.join(self.hostfs_mntpt, path)
+
+ return self.run_shell(args=['cat', path]).\
+ stdout.getvalue().strip()
+
+ def create_destroy(self):
+ assert(self.is_mounted())
+
+ filename = "{0} {1}".format(datetime.datetime.now(), self.client_id)
+ log.debug("Creating test file {0}".format(filename))
+ self.client_remote.run(args=[
+ 'touch', os.path.join(self.hostfs_mntpt, filename)
+ ])
+ log.debug("Deleting test file {0}".format(filename))
+ self.client_remote.run(args=[
+ 'rm', '-f', os.path.join(self.hostfs_mntpt, filename)
+ ])
+
+ def _run_python(self, pyscript, py_version='python3', sudo=False):
+ args, omit_sudo = [], True
+ if sudo:
+ args.append('sudo')
+ omit_sudo = False
+ args += ['adjust-ulimits', 'daemon-helper', 'kill', py_version, '-c', pyscript]
+ return self.client_remote.run(args=args, wait=False, stdin=run.PIPE,
+ stdout=StringIO(), omit_sudo=omit_sudo)
+
+ def run_python(self, pyscript, py_version='python3', sudo=False):
+ p = self._run_python(pyscript, py_version, sudo=sudo)
+ p.wait()
+ return p.stdout.getvalue().strip()
+
+ def run_shell(self, args, timeout=300, **kwargs):
+ omit_sudo = kwargs.pop('omit_sudo', False)
+ cwd = kwargs.pop('cwd', self.mountpoint)
+ stdout = kwargs.pop('stdout', StringIO())
+ stderr = kwargs.pop('stderr', StringIO())
+
+ return self.client_remote.run(args=args, cwd=cwd, timeout=timeout,
+ stdout=stdout, stderr=stderr,
+ omit_sudo=omit_sudo, **kwargs)
+
+ def run_shell_payload(self, payload, **kwargs):
+ kwargs['args'] = ["bash", "-c", Raw(f"'{payload}'")]
+ if kwargs.pop('sudo', False):
+ kwargs['args'].insert(0, 'sudo')
+ kwargs['omit_sudo'] = False
+ return self.run_shell(**kwargs)
+
+ def run_as_user(self, **kwargs):
+ """
+ Besides the arguments defined for run_shell() this method also
+ accepts argument 'user'.
+ """
+ args = kwargs.pop('args')
+ user = kwargs.pop('user')
+ if isinstance(args, str):
+ args = ['sudo', '-u', user, '-s', '/bin/bash', '-c', args]
+ elif isinstance(args, list):
+ cmdlist = args
+ cmd = ''
+ for i in cmdlist:
+ cmd = cmd + i + ' '
+ # get rid of extra space at the end.
+ cmd = cmd[:-1]
+
+ args = ['sudo', '-u', user, '-s', '/bin/bash', '-c', cmd]
+
+ kwargs['args'] = args
+ kwargs['omit_sudo'] = False
+ return self.run_shell(**kwargs)
+
+ def run_as_root(self, **kwargs):
+ """
+ Accepts same arguments as run_shell().
+ """
+ kwargs['user'] = 'root'
+ return self.run_as_user(**kwargs)
+
+ def assert_retval(self, proc_retval, exp_retval):
+ msg = (f'expected return value: {exp_retval}\n'
+ f'received return value: {proc_retval}\n')
+ assert proc_retval == exp_retval, msg
+
+ def _verify(self, proc, exp_retval=None, exp_errmsgs=None):
+ if exp_retval is None and exp_errmsgs is None:
+ raise RuntimeError('Method didn\'t get enough parameters. Pass '
+ 'return value or error message expected from '
+ 'the command/process.')
+
+ if exp_retval is not None:
+ self.assert_retval(proc.returncode, exp_retval)
+ if exp_errmsgs is None:
+ return
+
+ if isinstance(exp_errmsgs, str):
+ exp_errmsgs = (exp_errmsgs, )
+
+ proc_stderr = proc.stderr.getvalue().lower()
+ msg = ('didn\'t find any of the expected string in stderr.\n'
+ f'expected string: {exp_errmsgs}\n'
+ f'received error message: {proc_stderr}\n'
+ 'note: received error message is converted to lowercase')
+ for e in exp_errmsgs:
+ if e in proc_stderr:
+ break
+ # this else is meant for for loop.
+ else:
+ assert False, msg
+
+ def negtestcmd(self, args, retval=None, errmsgs=None, stdin=None,
+ cwd=None, wait=True):
+ """
+ Conduct a negative test for the given command.
+
+ retval and errmsgs are parameters to confirm the cause of command
+ failure.
+
+ Note: errmsgs is expected to be a tuple, but in case there's only
+ error message, it can also be a string. This method will handle
+ that internally.
+ """
+ proc = self.run_shell(args=args, wait=wait, stdin=stdin, cwd=cwd,
+ check_status=False)
+ self._verify(proc, retval, errmsgs)
+ return proc
+
+ def negtestcmd_as_user(self, args, user, retval=None, errmsgs=None,
+ stdin=None, cwd=None, wait=True):
+ proc = self.run_as_user(args=args, user=user, wait=wait, stdin=stdin,
+ cwd=cwd, check_status=False)
+ self._verify(proc, retval, errmsgs)
+ return proc
+
+ def negtestcmd_as_root(self, args, retval=None, errmsgs=None, stdin=None,
+ cwd=None, wait=True):
+ proc = self.run_as_root(args=args, wait=wait, stdin=stdin, cwd=cwd,
+ check_status=False)
+ self._verify(proc, retval, errmsgs)
+ return proc
+
+ def open_for_reading(self, basename):
+ """
+ Open a file for reading only.
+ """
+ assert(self.is_mounted())
+
+ path = os.path.join(self.hostfs_mntpt, basename)
+
+ return self._run_python(dedent(
+ """
+ import os
+ mode = os.O_RDONLY
+ fd = os.open("{path}", mode)
+ os.close(fd)
+ """.format(path=path)
+ ))
+
+ def open_for_writing(self, basename, creat=True, trunc=True, excl=False):
+ """
+ Open a file for writing only.
+ """
+ assert(self.is_mounted())
+
+ path = os.path.join(self.hostfs_mntpt, basename)
+
+ return self._run_python(dedent(
+ """
+ import os
+ mode = os.O_WRONLY
+ if {creat}:
+ mode |= os.O_CREAT
+ if {trunc}:
+ mode |= os.O_TRUNC
+ if {excl}:
+ mode |= os.O_EXCL
+ fd = os.open("{path}", mode)
+ os.close(fd)
+ """.format(path=path, creat=creat, trunc=trunc, excl=excl)
+ ))
+
+ def open_no_data(self, basename):
+ """
+ A pure metadata operation
+ """
+ assert(self.is_mounted())
+
+ path = os.path.join(self.hostfs_mntpt, basename)
+
+ p = self._run_python(dedent(
+ """
+ f = open("{path}", 'w')
+ """.format(path=path)
+ ))
+ p.wait()
+
+ def open_background(self, basename="background_file", write=True, content="content"):
+ """
+ Open a file for writing, then block such that the client
+ will hold a capability.
+
+ Don't return until the remote process has got as far as opening
+ the file, then return the RemoteProcess instance.
+ """
+ assert(self.is_mounted())
+
+ path = os.path.join(self.hostfs_mntpt, basename)
+
+ if write:
+ pyscript = dedent("""
+ import time
+
+ with open("{path}", 'w') as f:
+ f.write("{content}")
+ f.flush()
+ while True:
+ time.sleep(1)
+ """).format(path=path, content=content)
+ else:
+ pyscript = dedent("""
+ import time
+
+ with open("{path}", 'r') as f:
+ while True:
+ time.sleep(1)
+ """).format(path=path)
+
+ rproc = self._run_python(pyscript)
+ self.background_procs.append(rproc)
+
+ # This wait would not be sufficient if the file had already
+ # existed, but it's simple and in practice users of open_background
+ # are not using it on existing files.
+ if write:
+ self.wait_for_visible(basename, size=len(content))
+ else:
+ self.wait_for_visible(basename)
+
+ return rproc
+
+ def open_dir_background(self, basename):
+ """
+ Create and hold a capability to a directory.
+ """
+ assert(self.is_mounted())
+
+ path = os.path.join(self.hostfs_mntpt, basename)
+
+ pyscript = dedent("""
+ import time
+ import os
+
+ os.mkdir("{path}")
+ fd = os.open("{path}", os.O_RDONLY)
+ while True:
+ time.sleep(1)
+ """).format(path=path)
+
+ rproc = self._run_python(pyscript)
+ self.background_procs.append(rproc)
+
+ self.wait_for_visible(basename)
+
+ return rproc
+
+ def wait_for_dir_empty(self, dirname, timeout=30):
+ dirpath = os.path.join(self.hostfs_mntpt, dirname)
+ with safe_while(sleep=5, tries=(timeout//5)) as proceed:
+ while proceed():
+ p = self.run_shell_payload(f"stat -c %h {dirpath}")
+ nr_links = int(p.stdout.getvalue().strip())
+ if nr_links == 2:
+ return
+
+ def wait_for_visible(self, basename="background_file", size=None, timeout=30):
+ i = 0
+ args = ['stat']
+ if size is not None:
+ args += ['--printf=%s']
+ args += [os.path.join(self.hostfs_mntpt, basename)]
+ while i < timeout:
+ p = self.client_remote.run(args=args, stdout=StringIO(), check_status=False)
+ if p.exitstatus == 0:
+ if size is not None:
+ s = p.stdout.getvalue().strip()
+ if int(s) == size:
+ log.info(f"File {basename} became visible with size {size} from {self.client_id} after {i}s")
+ return
+ else:
+ log.error(f"File {basename} became visible but with size {int(s)} not {size}")
+ else:
+ log.info(f"File {basename} became visible from {self.client_id} after {i}s")
+ return
+ time.sleep(1)
+ i += 1
+
+ raise RuntimeError("Timed out after {0}s waiting for {1} to become visible from {2}".format(
+ i, basename, self.client_id))
+
+ def lock_background(self, basename="background_file", do_flock=True):
+ """
+ Open and lock a files for writing, hold the lock in a background process
+ """
+ assert(self.is_mounted())
+
+ path = os.path.join(self.hostfs_mntpt, basename)
+
+ script_builder = """
+ import time
+ import fcntl
+ import struct"""
+ if do_flock:
+ script_builder += """
+ f1 = open("{path}-1", 'w')
+ fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB)"""
+ script_builder += """
+ f2 = open("{path}-2", 'w')
+ lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0)
+ fcntl.fcntl(f2, fcntl.F_SETLK, lockdata)
+ while True:
+ time.sleep(1)
+ """
+
+ pyscript = dedent(script_builder).format(path=path)
+
+ log.info("lock_background file {0}".format(basename))
+ rproc = self._run_python(pyscript)
+ self.background_procs.append(rproc)
+ return rproc
+
+ def lock_and_release(self, basename="background_file"):
+ assert(self.is_mounted())
+
+ path = os.path.join(self.hostfs_mntpt, basename)
+
+ script = """
+ import time
+ import fcntl
+ import struct
+ f1 = open("{path}-1", 'w')
+ fcntl.flock(f1, fcntl.LOCK_EX)
+ f2 = open("{path}-2", 'w')
+ lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0)
+ fcntl.fcntl(f2, fcntl.F_SETLK, lockdata)
+ """
+ pyscript = dedent(script).format(path=path)
+
+ log.info("lock_and_release file {0}".format(basename))
+ return self._run_python(pyscript)
+
+ def check_filelock(self, basename="background_file", do_flock=True):
+ assert(self.is_mounted())
+
+ path = os.path.join(self.hostfs_mntpt, basename)
+
+ script_builder = """
+ import fcntl
+ import errno
+ import struct"""
+ if do_flock:
+ script_builder += """
+ f1 = open("{path}-1", 'r')
+ try:
+ fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ except IOError as e:
+ if e.errno == errno.EAGAIN:
+ pass
+ else:
+ raise RuntimeError("flock on file {path}-1 not found")"""
+ script_builder += """
+ f2 = open("{path}-2", 'r')
+ try:
+ lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0)
+ fcntl.fcntl(f2, fcntl.F_SETLK, lockdata)
+ except IOError as e:
+ if e.errno == errno.EAGAIN:
+ pass
+ else:
+ raise RuntimeError("posix lock on file {path}-2 not found")
+ """
+ pyscript = dedent(script_builder).format(path=path)
+
+ log.info("check lock on file {0}".format(basename))
+ self.client_remote.run(args=[
+ 'python3', '-c', pyscript
+ ])
+
+ def write_background(self, basename="background_file", loop=False):
+ """
+ Open a file for writing, complete as soon as you can
+ :param basename:
+ :return:
+ """
+ assert(self.is_mounted())
+
+ path = os.path.join(self.hostfs_mntpt, basename)
+
+ pyscript = dedent("""
+ import os
+ import time
+
+ fd = os.open("{path}", os.O_RDWR | os.O_CREAT, 0o644)
+ try:
+ while True:
+ os.write(fd, b'content')
+ time.sleep(1)
+ if not {loop}:
+ break
+ except IOError as e:
+ pass
+ os.close(fd)
+ """).format(path=path, loop=str(loop))
+
+ rproc = self._run_python(pyscript)
+ self.background_procs.append(rproc)
+ return rproc
+
+ def write_n_mb(self, filename, n_mb, seek=0, wait=True):
+ """
+ Write the requested number of megabytes to a file
+ """
+ assert(self.is_mounted())
+
+ return self.run_shell(["dd", "if=/dev/urandom", "of={0}".format(filename),
+ "bs=1M", "conv=fdatasync",
+ "count={0}".format(int(n_mb)),
+ "seek={0}".format(int(seek))
+ ], wait=wait)
+
+ def write_test_pattern(self, filename, size):
+ log.info("Writing {0} bytes to {1}".format(size, filename))
+ return self.run_python(dedent("""
+ import zlib
+ path = "{path}"
+ with open(path, 'w') as f:
+ for i in range(0, {size}):
+ val = zlib.crc32(str(i).encode('utf-8')) & 7
+ f.write(chr(val))
+ """.format(
+ path=os.path.join(self.hostfs_mntpt, filename),
+ size=size
+ )))
+
+ def validate_test_pattern(self, filename, size):
+ log.info("Validating {0} bytes from {1}".format(size, filename))
+ # Use sudo because cephfs-data-scan may recreate the file with owner==root
+ return self.run_python(dedent("""
+ import zlib
+ path = "{path}"
+ with open(path, 'r') as f:
+ bytes = f.read()
+ if len(bytes) != {size}:
+ raise RuntimeError("Bad length {{0}} vs. expected {{1}}".format(
+ len(bytes), {size}
+ ))
+ for i, b in enumerate(bytes):
+ val = zlib.crc32(str(i).encode('utf-8')) & 7
+ if b != chr(val):
+ raise RuntimeError("Bad data at offset {{0}}".format(i))
+ """.format(
+ path=os.path.join(self.hostfs_mntpt, filename),
+ size=size
+ )), sudo=True)
+
+ def open_n_background(self, fs_path, count):
+ """
+ Open N files for writing, hold them open in a background process
+
+ :param fs_path: Path relative to CephFS root, e.g. "foo/bar"
+ :return: a RemoteProcess
+ """
+ assert(self.is_mounted())
+
+ abs_path = os.path.join(self.hostfs_mntpt, fs_path)
+
+ pyscript = dedent("""
+ import sys
+ import time
+ import os
+
+ n = {count}
+ abs_path = "{abs_path}"
+
+ if not os.path.exists(abs_path):
+ os.makedirs(abs_path)
+
+ handles = []
+ for i in range(0, n):
+ fname = "file_"+str(i)
+ path = os.path.join(abs_path, fname)
+ handles.append(open(path, 'w'))
+
+ while True:
+ time.sleep(1)
+ """).format(abs_path=abs_path, count=count)
+
+ rproc = self._run_python(pyscript)
+ self.background_procs.append(rproc)
+ return rproc
+
+ def create_n_files(self, fs_path, count, sync=False, dirsync=False,
+ unlink=False, finaldirsync=False, hard_links=0):
+ """
+ Create n files.
+
+ :param sync: sync the file after writing
+ :param dirsync: sync the containing directory after closing the file
+ :param unlink: unlink the file after closing
+ :param finaldirsync: sync the containing directory after closing the last file
+ :param hard_links: create given number of hard link(s) for each file
+ """
+
+ assert(self.is_mounted())
+
+ abs_path = os.path.join(self.hostfs_mntpt, fs_path)
+
+ pyscript = dedent(f"""
+ import os
+ import uuid
+
+ n = {count}
+ create_hard_links = False
+ if {hard_links} > 0:
+ create_hard_links = True
+ path = "{abs_path}"
+
+ dpath = os.path.dirname(path)
+ fnameprefix = os.path.basename(path)
+ os.makedirs(dpath, exist_ok=True)
+
+ try:
+ dirfd = os.open(dpath, os.O_DIRECTORY)
+
+ for i in range(n):
+ fpath = os.path.join(dpath, f"{{fnameprefix}}_{{i}}")
+ with open(fpath, 'w') as f:
+ f.write(f"{{i}}")
+ if {sync}:
+ f.flush()
+ os.fsync(f.fileno())
+ if {unlink}:
+ os.unlink(fpath)
+ if {dirsync}:
+ os.fsync(dirfd)
+ if create_hard_links:
+ for j in range({hard_links}):
+ os.system(f"ln {{fpath}} {{dpath}}/{{fnameprefix}}_{{i}}_{{uuid.uuid4()}}")
+ if {finaldirsync}:
+ os.fsync(dirfd)
+ finally:
+ os.close(dirfd)
+ """)
+
+ self.run_python(pyscript)
+
+ def teardown(self):
+ for p in self.background_procs:
+ log.info("Terminating background process")
+ self._kill_background(p)
+
+ self.background_procs = []
+
+ def _kill_background(self, p):
+ if p.stdin:
+ p.stdin.close()
+ try:
+ p.wait()
+ except (CommandFailedError, ConnectionLostError):
+ pass
+
+ def kill_background(self, p):
+ """
+ For a process that was returned by one of the _background member functions,
+ kill it hard.
+ """
+ self._kill_background(p)
+ self.background_procs.remove(p)
+
+ def send_signal(self, signal):
+ signal = signal.lower()
+ if signal.lower() not in ['sigstop', 'sigcont', 'sigterm', 'sigkill']:
+ raise NotImplementedError
+
+ self.client_remote.run(args=['sudo', 'kill', '-{0}'.format(signal),
+ self.client_pid], omit_sudo=False)
+
+ def get_global_id(self):
+ raise NotImplementedError()
+
+ def get_global_inst(self):
+ raise NotImplementedError()
+
+ def get_global_addr(self):
+ raise NotImplementedError()
+
+ def get_osd_epoch(self):
+ raise NotImplementedError()
+
+ def get_op_read_count(self):
+ raise NotImplementedError()
+
+ def readlink(self, fs_path):
+ abs_path = os.path.join(self.hostfs_mntpt, fs_path)
+
+ pyscript = dedent("""
+ import os
+
+ print(os.readlink("{path}"))
+ """).format(path=abs_path)
+
+ proc = self._run_python(pyscript)
+ proc.wait()
+ return str(proc.stdout.getvalue().strip())
+
+
+ def lstat(self, fs_path, follow_symlinks=False, wait=True):
+ return self.stat(fs_path, follow_symlinks=False, wait=True)
+
+ def stat(self, fs_path, follow_symlinks=True, wait=True, **kwargs):
+ """
+ stat a file, and return the result as a dictionary like this:
+ {
+ "st_ctime": 1414161137.0,
+ "st_mtime": 1414161137.0,
+ "st_nlink": 33,
+ "st_gid": 0,
+ "st_dev": 16777218,
+ "st_size": 1190,
+ "st_ino": 2,
+ "st_uid": 0,
+ "st_mode": 16877,
+ "st_atime": 1431520593.0
+ }
+
+ Raises exception on absent file.
+ """
+ abs_path = os.path.join(self.hostfs_mntpt, fs_path)
+ if follow_symlinks:
+ stat_call = "os.stat('" + abs_path + "')"
+ else:
+ stat_call = "os.lstat('" + abs_path + "')"
+
+ pyscript = dedent("""
+ import os
+ import stat
+ import json
+ import sys
+
+ try:
+ s = {stat_call}
+ except OSError as e:
+ sys.exit(e.errno)
+
+ attrs = ["st_mode", "st_ino", "st_dev", "st_nlink", "st_uid", "st_gid", "st_size", "st_atime", "st_mtime", "st_ctime"]
+ print(json.dumps(
+ dict([(a, getattr(s, a)) for a in attrs]),
+ indent=2))
+ """).format(stat_call=stat_call)
+ proc = self._run_python(pyscript, **kwargs)
+ if wait:
+ proc.wait()
+ return json.loads(proc.stdout.getvalue().strip())
+ else:
+ return proc
+
+ def touch(self, fs_path):
+ """
+ Create a dentry if it doesn't already exist. This python
+ implementation exists because the usual command line tool doesn't
+ pass through error codes like EIO.
+
+ :param fs_path:
+ :return:
+ """
+ abs_path = os.path.join(self.hostfs_mntpt, fs_path)
+ pyscript = dedent("""
+ import sys
+ import errno
+
+ try:
+ f = open("{path}", "w")
+ f.close()
+ except IOError as e:
+ sys.exit(errno.EIO)
+ """).format(path=abs_path)
+ proc = self._run_python(pyscript)
+ proc.wait()
+
+ def path_to_ino(self, fs_path, follow_symlinks=True):
+ abs_path = os.path.join(self.hostfs_mntpt, fs_path)
+
+ if follow_symlinks:
+ pyscript = dedent("""
+ import os
+ import stat
+
+ print(os.stat("{path}").st_ino)
+ """).format(path=abs_path)
+ else:
+ pyscript = dedent("""
+ import os
+ import stat
+
+ print(os.lstat("{path}").st_ino)
+ """).format(path=abs_path)
+
+ proc = self._run_python(pyscript)
+ proc.wait()
+ return int(proc.stdout.getvalue().strip())
+
+ def path_to_nlink(self, fs_path):
+ abs_path = os.path.join(self.hostfs_mntpt, fs_path)
+
+ pyscript = dedent("""
+ import os
+ import stat
+
+ print(os.stat("{path}").st_nlink)
+ """).format(path=abs_path)
+
+ proc = self._run_python(pyscript)
+ proc.wait()
+ return int(proc.stdout.getvalue().strip())
+
+ def ls(self, path=None, **kwargs):
+ """
+ Wrap ls: return a list of strings
+ """
+ kwargs['args'] = ["ls"]
+ if path:
+ kwargs['args'].append(path)
+ if kwargs.pop('sudo', False):
+ kwargs['args'].insert(0, 'sudo')
+ kwargs['omit_sudo'] = False
+ ls_text = self.run_shell(**kwargs).stdout.getvalue().strip()
+
+ if ls_text:
+ return ls_text.split("\n")
+ else:
+ # Special case because otherwise split on empty string
+ # gives you [''] instead of []
+ return []
+
+ def setfattr(self, path, key, val, **kwargs):
+ """
+ Wrap setfattr.
+
+ :param path: relative to mount point
+ :param key: xattr name
+ :param val: xattr value
+ :return: None
+ """
+ kwargs['args'] = ["setfattr", "-n", key, "-v", val, path]
+ if kwargs.pop('sudo', False):
+ kwargs['args'].insert(0, 'sudo')
+ kwargs['omit_sudo'] = False
+ self.run_shell(**kwargs)
+
+ def getfattr(self, path, attr, **kwargs):
+ """
+ Wrap getfattr: return the values of a named xattr on one file, or
+ None if the attribute is not found.
+
+ :return: a string
+ """
+ kwargs['args'] = ["getfattr", "--only-values", "-n", attr, path]
+ if kwargs.pop('sudo', False):
+ kwargs['args'].insert(0, 'sudo')
+ kwargs['omit_sudo'] = False
+ kwargs['wait'] = False
+ p = self.run_shell(**kwargs)
+ try:
+ p.wait()
+ except CommandFailedError as e:
+ if e.exitstatus == 1 and "No such attribute" in p.stderr.getvalue():
+ return None
+ else:
+ raise
+
+ return str(p.stdout.getvalue())
+
+ def df(self):
+ """
+ Wrap df: return a dict of usage fields in bytes
+ """
+
+ p = self.run_shell(["df", "-B1", "."])
+ lines = p.stdout.getvalue().strip().split("\n")
+ fs, total, used, avail = lines[1].split()[:4]
+ log.warning(lines)
+
+ return {
+ "total": int(total),
+ "used": int(used),
+ "available": int(avail)
+ }
+
+ def dir_checksum(self, path=None, follow_symlinks=False):
+ cmd = ["find"]
+ if follow_symlinks:
+ cmd.append("-L")
+ if path:
+ cmd.append(path)
+ cmd.extend(["-type", "f", "-exec", "md5sum", "{}", "+"])
+ checksum_text = self.run_shell(cmd).stdout.getvalue().strip()
+ checksum_sorted = sorted(checksum_text.split('\n'), key=lambda v: v.split()[1])
+ return hashlib.md5(('\n'.join(checksum_sorted)).encode('utf-8')).hexdigest()
+
+ def validate_subvol_options(self):
+ mount_subvol_num = self.client_config.get('mount_subvol_num', None)
+ if self.cephfs_mntpt and mount_subvol_num is not None:
+ log.warning("You cannot specify both: cephfs_mntpt and mount_subvol_num")
+ log.info(f"Mounting subvol {mount_subvol_num} for now")
+
+ if mount_subvol_num is not None:
+ # mount_subvol must be an index into the subvol path array for the fs
+ if not self.cephfs_name:
+ self.cephfs_name = 'cephfs'
+ assert(hasattr(self.ctx, "created_subvols"))
+ # mount_subvol must be specified under client.[0-9] yaml section
+ subvol_paths = self.ctx.created_subvols[self.cephfs_name]
+ path_to_mount = subvol_paths[mount_subvol_num]
+ self.cephfs_mntpt = path_to_mount
diff --git a/qa/tasks/cephfs/test_acls.py b/qa/tasks/cephfs/test_acls.py
new file mode 100644
index 000000000..48160dd8b
--- /dev/null
+++ b/qa/tasks/cephfs/test_acls.py
@@ -0,0 +1,39 @@
+from logging import getLogger
+
+from io import StringIO
+from tasks.cephfs.xfstests_dev import XFSTestsDev
+
+
+log = getLogger(__name__)
+
+
+class TestACLs(XFSTestsDev):
+
+ def test_acls(self):
+ from tasks.cephfs.fuse_mount import FuseMount
+ from tasks.cephfs.kernel_mount import KernelMount
+
+ if isinstance(self.mount_a, FuseMount):
+ log.info('client is fuse mounted')
+ elif isinstance(self.mount_a, KernelMount):
+ log.info('client is kernel mounted')
+
+ # XXX: check_status is set to False so that we can check for command's
+ # failure on our own (since this command doesn't set right error code
+ # and error message in some cases) and print custom log messages
+ # accordingly.
+ proc = self.mount_a.client_remote.run(args=['sudo', 'env', 'DIFF_LENGTH=0',
+ './check', 'generic/099'], cwd=self.xfstests_repo_path, stdout=StringIO(),
+ stderr=StringIO(), timeout=30, check_status=False,omit_sudo=False,
+ label='running tests for ACLs from xfstests-dev')
+
+ if proc.returncode != 0:
+ log.info('Command failed.')
+ log.info(f'Command return value: {proc.returncode}')
+ stdout, stderr = proc.stdout.getvalue(), proc.stderr.getvalue()
+ log.info(f'Command stdout -\n{stdout}')
+ log.info(f'Command stderr -\n{stderr}')
+
+ self.assertEqual(proc.returncode, 0)
+ success_line = 'Passed all 1 tests'
+ self.assertIn(success_line, stdout)
diff --git a/qa/tasks/cephfs/test_admin.py b/qa/tasks/cephfs/test_admin.py
new file mode 100644
index 000000000..9890381c6
--- /dev/null
+++ b/qa/tasks/cephfs/test_admin.py
@@ -0,0 +1,1494 @@
+import errno
+import json
+import logging
+import time
+import uuid
+from io import StringIO
+from os.path import join as os_path_join
+
+from teuthology.exceptions import CommandFailedError
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, classhook
+from tasks.cephfs.filesystem import FileLayout, FSMissing
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.caps_helper import CapTester
+
+log = logging.getLogger(__name__)
+
+class TestAdminCommands(CephFSTestCase):
+ """
+ Tests for administration command.
+ """
+
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 1
+
+ def check_pool_application_metadata_key_value(self, pool, app, key, value):
+ output = self.fs.mon_manager.raw_cluster_cmd(
+ 'osd', 'pool', 'application', 'get', pool, app, key)
+ self.assertEqual(str(output.strip()), value)
+
+ def setup_ec_pools(self, n, metadata=True, overwrites=True):
+ if metadata:
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', n+"-meta", "8")
+ cmd = ['osd', 'erasure-code-profile', 'set', n+"-profile", "m=2", "k=2", "crush-failure-domain=osd"]
+ self.fs.mon_manager.raw_cluster_cmd(*cmd)
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', n+"-data", "8", "erasure", n+"-profile")
+ if overwrites:
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'set', n+"-data", 'allow_ec_overwrites', 'true')
+
+@classhook('_add_valid_tell')
+class TestValidTell(TestAdminCommands):
+ @classmethod
+ def _add_valid_tell(cls):
+ tells = [
+ ['cache', 'status'],
+ ['damage', 'ls'],
+ ['dump_blocked_ops'],
+ ['dump_blocked_ops_count'],
+ ['dump_historic_ops'],
+ ['dump_historic_ops_by_duration'],
+ ['dump_mempools'],
+ ['dump_ops_in_flight'],
+ ['flush', 'journal'],
+ ['get', 'subtrees'],
+ ['ops', 'locks'],
+ ['ops'],
+ ['status'],
+ ['version'],
+ ]
+ def test(c):
+ def f(self):
+ J = self.fs.rank_tell(c)
+ json.dumps(J)
+ log.debug("dumped:\n%s", str(J))
+ return f
+ for c in tells:
+ setattr(cls, 'test_valid_' + '_'.join(c), test(c))
+
+class TestFsStatus(TestAdminCommands):
+ """
+ Test "ceph fs status subcommand.
+ """
+
+ def test_fs_status(self):
+ """
+ That `ceph fs status` command functions.
+ """
+
+ s = self.fs.mon_manager.raw_cluster_cmd("fs", "status")
+ self.assertTrue("active" in s)
+
+ mdsmap = json.loads(self.fs.mon_manager.raw_cluster_cmd("fs", "status", "--format=json-pretty"))["mdsmap"]
+ self.assertEqual(mdsmap[0]["state"], "active")
+
+ mdsmap = json.loads(self.fs.mon_manager.raw_cluster_cmd("fs", "status", "--format=json"))["mdsmap"]
+ self.assertEqual(mdsmap[0]["state"], "active")
+
+
+class TestAddDataPool(TestAdminCommands):
+ """
+ Test "ceph fs add_data_pool" subcommand.
+ """
+
+ def test_add_data_pool_root(self):
+ """
+ That a new data pool can be added and used for the root directory.
+ """
+
+ p = self.fs.add_data_pool("foo")
+ self.fs.set_dir_layout(self.mount_a, ".", FileLayout(pool=p))
+
+ def test_add_data_pool_application_metadata(self):
+ """
+ That the application metadata set on a newly added data pool is as expected.
+ """
+ pool_name = "foo"
+ mon_cmd = self.fs.mon_manager.raw_cluster_cmd
+ mon_cmd('osd', 'pool', 'create', pool_name, '--pg_num_min',
+ str(self.fs.pg_num_min))
+ # Check whether https://tracker.ceph.com/issues/43061 is fixed
+ mon_cmd('osd', 'pool', 'application', 'enable', pool_name, 'cephfs')
+ self.fs.add_data_pool(pool_name, create=False)
+ self.check_pool_application_metadata_key_value(
+ pool_name, 'cephfs', 'data', self.fs.name)
+
+ def test_add_data_pool_subdir(self):
+ """
+ That a new data pool can be added and used for a sub-directory.
+ """
+
+ p = self.fs.add_data_pool("foo")
+ self.mount_a.run_shell("mkdir subdir")
+ self.fs.set_dir_layout(self.mount_a, "subdir", FileLayout(pool=p))
+
+ def test_add_data_pool_non_alphamueric_name_as_subdir(self):
+ """
+ That a new data pool with non-alphanumeric name can be added and used for a sub-directory.
+ """
+ p = self.fs.add_data_pool("I-am-data_pool00.")
+ self.mount_a.run_shell("mkdir subdir")
+ self.fs.set_dir_layout(self.mount_a, "subdir", FileLayout(pool=p))
+
+ def test_add_data_pool_ec(self):
+ """
+ That a new EC data pool can be added.
+ """
+
+ n = "test_add_data_pool_ec"
+ self.setup_ec_pools(n, metadata=False)
+ self.fs.add_data_pool(n+"-data", create=False)
+
+ def test_add_already_in_use_data_pool(self):
+ """
+ That command try to add data pool which is already in use with another fs.
+ """
+
+ # create first data pool, metadata pool and add with filesystem
+ first_fs = "first_fs"
+ first_metadata_pool = "first_metadata_pool"
+ first_data_pool = "first_data_pool"
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+
+ # create second data pool, metadata pool and add with filesystem
+ second_fs = "second_fs"
+ second_metadata_pool = "second_metadata_pool"
+ second_data_pool = "second_data_pool"
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool)
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool)
+
+ # try to add 'first_data_pool' with 'second_fs'
+ # Expecting EINVAL exit status because 'first_data_pool' is already in use with 'first_fs'
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', second_fs, first_data_pool)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EINVAL)
+ else:
+ self.fail("Expected EINVAL because data pool is already in use as data pool for first_fs")
+
+ def test_add_already_in_use_metadata_pool(self):
+ """
+ That command try to add metadata pool which is already in use with another fs.
+ """
+
+ # create first data pool, metadata pool and add with filesystem
+ first_fs = "first_fs"
+ first_metadata_pool = "first_metadata_pool"
+ first_data_pool = "first_data_pool"
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+
+ # create second data pool, metadata pool and add with filesystem
+ second_fs = "second_fs"
+ second_metadata_pool = "second_metadata_pool"
+ second_data_pool = "second_data_pool"
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool)
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool)
+
+ # try to add 'second_metadata_pool' with 'first_fs' as a data pool
+ # Expecting EINVAL exit status because 'second_metadata_pool'
+ # is already in use with 'second_fs' as a metadata pool
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', first_fs, second_metadata_pool)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EINVAL)
+ else:
+ self.fail("Expected EINVAL because data pool is already in use as metadata pool for 'second_fs'")
+
+class TestFsNew(TestAdminCommands):
+ """
+ Test "ceph fs new" subcommand.
+ """
+ MDSS_REQUIRED = 3
+
+ def test_fsnames_can_only_by_goodchars(self):
+ n = 'test_fsnames_can_only_by_goodchars'
+ metapoolname, datapoolname = n+'-testmetapool', n+'-testdatapool'
+ badname = n+'badname@#'
+
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+ n+metapoolname)
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+ n+datapoolname)
+
+ # test that fsname not with "goodchars" fails
+ args = ['fs', 'new', badname, metapoolname, datapoolname]
+ proc = self.fs.mon_manager.run_cluster_cmd(args=args,stderr=StringIO(),
+ check_status=False)
+ self.assertIn('invalid chars', proc.stderr.getvalue().lower())
+
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'rm', metapoolname,
+ metapoolname,
+ '--yes-i-really-really-mean-it-not-faking')
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'rm', datapoolname,
+ datapoolname,
+ '--yes-i-really-really-mean-it-not-faking')
+
+ def test_new_default_ec(self):
+ """
+ That a new file system warns/fails with an EC default data pool.
+ """
+
+ self.mount_a.umount_wait(require_clean=True)
+ self.mds_cluster.delete_all_filesystems()
+ n = "test_new_default_ec"
+ self.setup_ec_pools(n)
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data")
+ except CommandFailedError as e:
+ if e.exitstatus == 22:
+ pass
+ else:
+ raise
+ else:
+ raise RuntimeError("expected failure")
+
+ def test_new_default_ec_force(self):
+ """
+ That a new file system succeeds with an EC default data pool with --force.
+ """
+
+ self.mount_a.umount_wait(require_clean=True)
+ self.mds_cluster.delete_all_filesystems()
+ n = "test_new_default_ec_force"
+ self.setup_ec_pools(n)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force")
+
+ def test_new_default_ec_no_overwrite(self):
+ """
+ That a new file system fails with an EC default data pool without overwrite.
+ """
+
+ self.mount_a.umount_wait(require_clean=True)
+ self.mds_cluster.delete_all_filesystems()
+ n = "test_new_default_ec_no_overwrite"
+ self.setup_ec_pools(n, overwrites=False)
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data")
+ except CommandFailedError as e:
+ if e.exitstatus == 22:
+ pass
+ else:
+ raise
+ else:
+ raise RuntimeError("expected failure")
+ # and even with --force !
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force")
+ except CommandFailedError as e:
+ if e.exitstatus == 22:
+ pass
+ else:
+ raise
+ else:
+ raise RuntimeError("expected failure")
+
+ def test_fs_new_pool_application_metadata(self):
+ """
+ That the application metadata set on the pools of a newly created filesystem are as expected.
+ """
+ self.mount_a.umount_wait(require_clean=True)
+ self.mds_cluster.delete_all_filesystems()
+ fs_name = "test_fs_new_pool_application"
+ keys = ['metadata', 'data']
+ pool_names = [fs_name+'-'+key for key in keys]
+ mon_cmd = self.fs.mon_manager.raw_cluster_cmd
+ for p in pool_names:
+ mon_cmd('osd', 'pool', 'create', p, '--pg_num_min', str(self.fs.pg_num_min))
+ mon_cmd('osd', 'pool', 'application', 'enable', p, 'cephfs')
+ mon_cmd('fs', 'new', fs_name, pool_names[0], pool_names[1])
+ for i in range(2):
+ self.check_pool_application_metadata_key_value(
+ pool_names[i], 'cephfs', keys[i], fs_name)
+
+ def test_fs_new_with_specific_id(self):
+ """
+ That a file system can be created with a specific ID.
+ """
+ fs_name = "test_fs_specific_id"
+ fscid = 100
+ keys = ['metadata', 'data']
+ pool_names = [fs_name+'-'+key for key in keys]
+ for p in pool_names:
+ self.run_cluster_cmd(f'osd pool create {p}')
+ self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force')
+ self.fs.status().get_fsmap(fscid)
+ for i in range(2):
+ self.check_pool_application_metadata_key_value(pool_names[i], 'cephfs', keys[i], fs_name)
+
+ def test_fs_new_with_specific_id_idempotency(self):
+ """
+ That command to create file system with specific ID is idempotent.
+ """
+ fs_name = "test_fs_specific_id"
+ fscid = 100
+ keys = ['metadata', 'data']
+ pool_names = [fs_name+'-'+key for key in keys]
+ for p in pool_names:
+ self.run_cluster_cmd(f'osd pool create {p}')
+ self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force')
+ self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force')
+ self.fs.status().get_fsmap(fscid)
+
+ def test_fs_new_with_specific_id_fails_without_force_flag(self):
+ """
+ That command to create file system with specific ID fails without '--force' flag.
+ """
+ fs_name = "test_fs_specific_id"
+ fscid = 100
+ keys = ['metadata', 'data']
+ pool_names = [fs_name+'-'+key for key in keys]
+ for p in pool_names:
+ self.run_cluster_cmd(f'osd pool create {p}')
+ try:
+ self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid}')
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL,
+ "invalid error code on creating a file system with specifc ID without --force flag")
+ else:
+ self.fail("expected creating file system with specific ID without '--force' flag to fail")
+
+ def test_fs_new_with_specific_id_fails_already_in_use(self):
+ """
+ That creating file system with ID already in use fails.
+ """
+ fs_name = "test_fs_specific_id"
+ # file system ID already in use
+ fscid = self.fs.status().map['filesystems'][0]['id']
+ keys = ['metadata', 'data']
+ pool_names = [fs_name+'-'+key for key in keys]
+ for p in pool_names:
+ self.run_cluster_cmd(f'osd pool create {p}')
+ try:
+ self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force')
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL,
+ "invalid error code on creating a file system with specifc ID that is already in use")
+ else:
+ self.fail("expected creating file system with ID already in use to fail")
+
+ def test_fs_new_metadata_pool_already_in_use(self):
+ """
+ That creating file system with metadata pool already in use.
+ """
+
+ # create first data pool, metadata pool and add with filesystem
+ first_fs = "first_fs"
+ first_metadata_pool = "first_metadata_pool"
+ first_data_pool = "first_data_pool"
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+
+ second_fs = "second_fs"
+ second_data_pool = "second_data_pool"
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool)
+
+ # try to create new fs 'second_fs' with following configuration
+ # metadata pool -> 'first_metadata_pool'
+ # data pool -> 'second_data_pool'
+ # Expecting EINVAL exit status because 'first_metadata_pool'
+ # is already in use with 'first_fs'
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, first_metadata_pool, second_data_pool)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EINVAL)
+ else:
+ self.fail("Expected EINVAL because metadata pool is already in use for 'first_fs'")
+
+ def test_fs_new_data_pool_already_in_use(self):
+ """
+ That creating file system with data pool already in use.
+ """
+
+ # create first data pool, metadata pool and add with filesystem
+ first_fs = "first_fs"
+ first_metadata_pool = "first_metadata_pool"
+ first_data_pool = "first_data_pool"
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+
+ second_fs = "second_fs"
+ second_metadata_pool = "second_metadata_pool"
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool)
+
+ # try to create new fs 'second_fs' with following configuration
+ # metadata pool -> 'second_metadata_pool'
+ # data pool -> 'first_data_pool'
+ # Expecting EINVAL exit status because 'first_data_pool'
+ # is already in use with 'first_fs'
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, first_data_pool)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EINVAL)
+ else:
+ self.fail("Expected EINVAL because data pool is already in use for 'first_fs'")
+
+ def test_fs_new_metadata_and_data_pool_in_use_by_another_same_fs(self):
+ """
+ That creating file system with metadata and data pool which is already in use by another same fs.
+ """
+
+ # create first data pool, metadata pool and add with filesystem
+ first_fs = "first_fs"
+ first_metadata_pool = "first_metadata_pool"
+ first_data_pool = "first_data_pool"
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+
+ second_fs = "second_fs"
+
+ # try to create new fs 'second_fs' with following configuration
+ # metadata pool -> 'first_metadata_pool'
+ # data pool -> 'first_data_pool'
+ # Expecting EINVAL exit status because 'first_metadata_pool' and 'first_data_pool'
+ # is already in use with 'first_fs'
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, first_metadata_pool, first_data_pool)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EINVAL)
+ else:
+ self.fail("Expected EINVAL because metadata and data pool is already in use for 'first_fs'")
+
+ def test_fs_new_metadata_and_data_pool_in_use_by_different_fs(self):
+ """
+ That creating file system with metadata and data pool which is already in use by different fs.
+ """
+
+ # create first data pool, metadata pool and add with filesystem
+ first_fs = "first_fs"
+ first_metadata_pool = "first_metadata_pool"
+ first_data_pool = "first_data_pool"
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+
+ # create second data pool, metadata pool and add with filesystem
+ second_fs = "second_fs"
+ second_metadata_pool = "second_metadata_pool"
+ second_data_pool = "second_data_pool"
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool)
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool)
+
+ third_fs = "third_fs"
+
+ # try to create new fs 'third_fs' with following configuration
+ # metadata pool -> 'first_metadata_pool'
+ # data pool -> 'second_data_pool'
+ # Expecting EINVAL exit status because 'first_metadata_pool' and 'second_data_pool'
+ # is already in use with 'first_fs' and 'second_fs'
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', third_fs, first_metadata_pool, second_data_pool)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EINVAL)
+ else:
+ self.fail("Expected EINVAL because metadata and data pool is already in use for 'first_fs' and 'second_fs'")
+
+ def test_fs_new_interchange_already_in_use_metadata_and_data_pool_of_same_fs(self):
+ """
+ That creating file system with interchanging metadata and data pool which is already in use by same fs.
+ """
+
+ # create first data pool, metadata pool and add with filesystem
+ first_fs = "first_fs"
+ first_metadata_pool = "first_metadata_pool"
+ first_data_pool = "first_data_pool"
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+
+ second_fs = "second_fs"
+
+ # try to create new fs 'second_fs' with following configuration
+ # metadata pool -> 'first_data_pool' (already used as data pool for 'first_fs')
+ # data pool -> 'first_metadata_pool' (already used as metadata pool for 'first_fs')
+ # Expecting EINVAL exit status because 'first_data_pool' and 'first_metadata_pool'
+ # is already in use with 'first_fs'
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, first_data_pool, first_metadata_pool)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EINVAL)
+ else:
+ self.fail("Expected EINVAL because metadata and data pool is already in use for 'first_fs'")
+
+ def test_fs_new_interchange_already_in_use_metadata_and_data_pool_of_different_fs(self):
+ """
+ That creating file system with interchanging metadata and data pool which is already in use by defferent fs.
+ """
+
+ # create first data pool, metadata pool and add with filesystem
+ first_fs = "first_fs"
+ first_metadata_pool = "first_metadata_pool"
+ first_data_pool = "first_data_pool"
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+
+ # create second data pool, metadata pool and add with filesystem
+ second_fs = "second_fs"
+ second_metadata_pool = "second_metadata_pool"
+ second_data_pool = "second_data_pool"
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool)
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool)
+
+ third_fs = "third_fs"
+
+ # try to create new fs 'third_fs' with following configuration
+ # metadata pool -> 'first_data_pool' (already used as data pool for 'first_fs')
+ # data pool -> 'second_metadata_pool' (already used as metadata pool for 'second_fs')
+ # Expecting EINVAL exit status because 'first_data_pool' and 'second_metadata_pool'
+ # is already in use with 'first_fs' and 'second_fs'
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', third_fs, first_data_pool, second_metadata_pool)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EINVAL)
+ else:
+ self.fail("Expected EINVAL because metadata and data pool is already in use for 'first_fs' and 'second_fs'")
+
+ def test_fs_new_metadata_pool_already_in_use_with_rbd(self):
+ """
+ That creating new file system with metadata pool already used by rbd.
+ """
+
+ # create pool and initialise with rbd
+ new_pool = "new_pool"
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_pool)
+ self.ctx.cluster.run(args=['rbd', 'pool', 'init', new_pool])
+
+ new_fs = "new_fs"
+ new_data_pool = "new_data_pool"
+
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_data_pool)
+
+ # try to create new fs 'new_fs' with following configuration
+ # metadata pool -> 'new_pool' (already used by rbd app)
+ # data pool -> 'new_data_pool'
+ # Expecting EINVAL exit status because 'new_pool' is already in use with 'rbd' app
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', new_fs, new_pool, new_data_pool)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EINVAL)
+ else:
+ self.fail("Expected EINVAL because metadata pool is already in use for rbd")
+
+ def test_fs_new_data_pool_already_in_use_with_rbd(self):
+ """
+ That creating new file system with data pool already used by rbd.
+ """
+
+ # create pool and initialise with rbd
+ new_pool = "new_pool"
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_pool)
+ self.ctx.cluster.run(args=['rbd', 'pool', 'init', new_pool])
+
+ new_fs = "new_fs"
+ new_metadata_pool = "new_metadata_pool"
+
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_metadata_pool)
+
+ # try to create new fs 'new_fs' with following configuration
+ # metadata pool -> 'new_metadata_pool'
+ # data pool -> 'new_pool' (already used by rbd app)
+ # Expecting EINVAL exit status because 'new_pool' is already in use with 'rbd' app
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', new_fs, new_metadata_pool, new_pool)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EINVAL)
+ else:
+ self.fail("Expected EINVAL because data pool is already in use for rbd")
+
+class TestRenameCommand(TestAdminCommands):
+ """
+ Tests for rename command.
+ """
+
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 2
+
+ def test_fs_rename(self):
+ """
+ That the file system can be renamed, and the application metadata set on its pools are as expected.
+ """
+ # Renaming the file system breaks this mount as the client uses
+ # file system specific authorization. The client cannot read
+ # or write even if the client's cephx ID caps are updated to access
+ # the new file system name without the client being unmounted and
+ # re-mounted.
+ self.mount_a.umount_wait(require_clean=True)
+ orig_fs_name = self.fs.name
+ new_fs_name = 'new_cephfs'
+ client_id = 'test_new_cephfs'
+
+ self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
+
+ # authorize a cephx ID access to the renamed file system.
+ # use the ID to write to the file system.
+ self.fs.name = new_fs_name
+ keyring = self.fs.authorize(client_id, ('/', 'rw'))
+ keyring_path = self.mount_a.client_remote.mktemp(data=keyring)
+ self.mount_a.remount(client_id=client_id,
+ client_keyring_path=keyring_path,
+ cephfs_mntpt='/',
+ cephfs_name=self.fs.name)
+ filedata, filename = 'some data on fs', 'file_on_fs'
+ filepath = os_path_join(self.mount_a.hostfs_mntpt, filename)
+ self.mount_a.write_file(filepath, filedata)
+ self.check_pool_application_metadata_key_value(
+ self.fs.get_data_pool_name(), 'cephfs', 'data', new_fs_name)
+ self.check_pool_application_metadata_key_value(
+ self.fs.get_metadata_pool_name(), 'cephfs', 'metadata', new_fs_name)
+
+ # cleanup
+ self.mount_a.umount_wait()
+ self.run_cluster_cmd(f'auth rm client.{client_id}')
+
+ def test_fs_rename_idempotency(self):
+ """
+ That the file system rename operation is idempotent.
+ """
+ # Renaming the file system breaks this mount as the client uses
+ # file system specific authorization.
+ self.mount_a.umount_wait(require_clean=True)
+ orig_fs_name = self.fs.name
+ new_fs_name = 'new_cephfs'
+
+ self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
+ self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
+
+ # original file system name does not appear in `fs ls` command
+ self.assertFalse(self.fs.exists())
+ self.fs.name = new_fs_name
+ self.assertTrue(self.fs.exists())
+
+ def test_fs_rename_fs_new_fails_with_old_fsname_existing_pools(self):
+ """
+ That after renaming a file system, creating a file system with
+ old name and existing FS pools fails.
+ """
+ # Renaming the file system breaks this mount as the client uses
+ # file system specific authorization.
+ self.mount_a.umount_wait(require_clean=True)
+ orig_fs_name = self.fs.name
+ new_fs_name = 'new_cephfs'
+ data_pool = self.fs.get_data_pool_name()
+ metadata_pool = self.fs.get_metadata_pool_name()
+ self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
+
+ try:
+ self.run_cluster_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool}")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL,
+ "invalid error code on creating a new file system with old "
+ "name and existing pools.")
+ else:
+ self.fail("expected creating new file system with old name and "
+ "existing pools to fail.")
+
+ try:
+ self.run_cluster_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool} --force")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL,
+ "invalid error code on creating a new file system with old "
+ "name, existing pools and --force flag.")
+ else:
+ self.fail("expected creating new file system with old name, "
+ "existing pools, and --force flag to fail.")
+
+ try:
+ self.run_cluster_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool} "
+ "--allow-dangerous-metadata-overlay")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL,
+ "invalid error code on creating a new file system with old name, "
+ "existing pools and --allow-dangerous-metadata-overlay flag.")
+ else:
+ self.fail("expected creating new file system with old name, "
+ "existing pools, and --allow-dangerous-metadata-overlay flag to fail.")
+
+ def test_fs_rename_fails_without_yes_i_really_mean_it_flag(self):
+ """
+ That renaming a file system without '--yes-i-really-mean-it' flag fails.
+ """
+ try:
+ self.run_cluster_cmd(f"fs rename {self.fs.name} new_fs")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EPERM,
+ "invalid error code on renaming a file system without the "
+ "'--yes-i-really-mean-it' flag")
+ else:
+ self.fail("expected renaming of file system without the "
+ "'--yes-i-really-mean-it' flag to fail ")
+
+ def test_fs_rename_fails_for_non_existent_fs(self):
+ """
+ That renaming a non-existent file system fails.
+ """
+ try:
+ self.run_cluster_cmd("fs rename non_existent_fs new_fs --yes-i-really-mean-it")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on renaming a non-existent fs")
+ else:
+ self.fail("expected renaming of a non-existent file system to fail")
+
+ def test_fs_rename_fails_new_name_already_in_use(self):
+ """
+ That renaming a file system fails if the new name refers to an existing file system.
+ """
+ self.fs2 = self.mds_cluster.newfs(name='cephfs2', create=True)
+
+ try:
+ self.run_cluster_cmd(f"fs rename {self.fs.name} {self.fs2.name} --yes-i-really-mean-it")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL,
+ "invalid error code on renaming to a fs name that is already in use")
+ else:
+ self.fail("expected renaming to a new file system name that is already in use to fail.")
+
+ def test_fs_rename_fails_with_mirroring_enabled(self):
+ """
+ That renaming a file system fails if mirroring is enabled on it.
+ """
+ orig_fs_name = self.fs.name
+ new_fs_name = 'new_cephfs'
+
+ self.run_cluster_cmd(f'fs mirror enable {orig_fs_name}')
+ try:
+ self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EPERM, "invalid error code on renaming a mirrored file system")
+ else:
+ self.fail("expected renaming of a mirrored file system to fail")
+ self.run_cluster_cmd(f'fs mirror disable {orig_fs_name}')
+
+
+class TestDump(CephFSTestCase):
+ CLIENTS_REQUIRED = 0
+ MDSS_REQUIRED = 1
+
+ def test_fs_dump_epoch(self):
+ """
+ That dumping a specific epoch works.
+ """
+
+ status1 = self.fs.status()
+ status2 = self.fs.status(epoch=status1["epoch"]-1)
+ self.assertEqual(status1["epoch"], status2["epoch"]+1)
+
+ def test_fsmap_trim(self):
+ """
+ That the fsmap is trimmed normally.
+ """
+
+ paxos_service_trim_min = 25
+ self.config_set('mon', 'paxos_service_trim_min', paxos_service_trim_min)
+ mon_max_mdsmap_epochs = 20
+ self.config_set('mon', 'mon_max_mdsmap_epochs', mon_max_mdsmap_epochs)
+
+ status = self.fs.status()
+ epoch = status["epoch"]
+
+ # for N mutations
+ mutations = paxos_service_trim_min + mon_max_mdsmap_epochs
+ b = False
+ for i in range(mutations):
+ self.fs.set_joinable(b)
+ b = not b
+
+ time.sleep(10) # for tick/compaction
+
+ try:
+ self.fs.status(epoch=epoch)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT, "invalid error code when trying to fetch FSMap that was trimmed")
+ else:
+ self.fail("trimming did not occur as expected")
+
+ def test_fsmap_force_trim(self):
+ """
+ That the fsmap is trimmed forcefully.
+ """
+
+ status = self.fs.status()
+ epoch = status["epoch"]
+
+ paxos_service_trim_min = 1
+ self.config_set('mon', 'paxos_service_trim_min', paxos_service_trim_min)
+ mon_mds_force_trim_to = epoch+1
+ self.config_set('mon', 'mon_mds_force_trim_to', mon_mds_force_trim_to)
+
+ # force a new fsmap
+ self.fs.set_joinable(False)
+ time.sleep(10) # for tick/compaction
+
+ status = self.fs.status()
+ log.debug(f"new epoch is {status['epoch']}")
+ self.fs.status(epoch=epoch+1) # epoch+1 is not trimmed, may not == status["epoch"]
+
+ try:
+ self.fs.status(epoch=epoch)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT, "invalid error code when trying to fetch FSMap that was trimmed")
+ else:
+ self.fail("trimming did not occur as expected")
+
+
+class TestRequiredClientFeatures(CephFSTestCase):
+ CLIENTS_REQUIRED = 0
+ MDSS_REQUIRED = 1
+
+ def test_required_client_features(self):
+ """
+ That `ceph fs required_client_features` command functions.
+ """
+
+ def is_required(index):
+ out = self.fs.mon_manager.raw_cluster_cmd('fs', 'get', self.fs.name, '--format=json-pretty')
+ features = json.loads(out)['mdsmap']['required_client_features']
+ if "feature_{0}".format(index) in features:
+ return True;
+ return False;
+
+ features = json.loads(self.fs.mon_manager.raw_cluster_cmd('fs', 'feature', 'ls', '--format=json-pretty'))
+ self.assertGreater(len(features), 0);
+
+ for f in features:
+ self.fs.required_client_features('rm', str(f['index']))
+
+ for f in features:
+ index = f['index']
+ feature = f['name']
+ if feature == 'reserved':
+ feature = str(index)
+
+ if index % 3 == 0:
+ continue;
+ self.fs.required_client_features('add', feature)
+ self.assertTrue(is_required(index))
+
+ if index % 2 == 0:
+ continue;
+ self.fs.required_client_features('rm', feature)
+ self.assertFalse(is_required(index))
+
+ def test_required_client_feature_add_reserved(self):
+ """
+ That `ceph fs required_client_features X add reserved` fails.
+ """
+
+ p = self.fs.required_client_features('add', 'reserved', check_status=False, stderr=StringIO())
+ self.assertIn('Invalid feature name', p.stderr.getvalue())
+
+ def test_required_client_feature_rm_reserved(self):
+ """
+ That `ceph fs required_client_features X rm reserved` fails.
+ """
+
+ p = self.fs.required_client_features('rm', 'reserved', check_status=False, stderr=StringIO())
+ self.assertIn('Invalid feature name', p.stderr.getvalue())
+
+ def test_required_client_feature_add_reserved_bit(self):
+ """
+ That `ceph fs required_client_features X add <reserved_bit>` passes.
+ """
+
+ p = self.fs.required_client_features('add', '1', stderr=StringIO())
+ self.assertIn("added feature 'reserved' to required_client_features", p.stderr.getvalue())
+
+ def test_required_client_feature_rm_reserved_bit(self):
+ """
+ That `ceph fs required_client_features X rm <reserved_bit>` passes.
+ """
+
+ self.fs.required_client_features('add', '1')
+ p = self.fs.required_client_features('rm', '1', stderr=StringIO())
+ self.assertIn("removed feature 'reserved' from required_client_features", p.stderr.getvalue())
+
+class TestCompatCommands(CephFSTestCase):
+ """
+ """
+
+ CLIENTS_REQUIRED = 0
+ MDSS_REQUIRED = 3
+
+ def test_add_compat(self):
+ """
+ Test adding a compat.
+ """
+
+ self.fs.fail()
+ self.fs.add_compat(63, 'placeholder')
+ mdsmap = self.fs.get_mds_map()
+ self.assertIn("feature_63", mdsmap['compat']['compat'])
+
+ def test_add_incompat(self):
+ """
+ Test adding an incompat.
+ """
+
+ self.fs.fail()
+ self.fs.add_incompat(63, 'placeholder')
+ mdsmap = self.fs.get_mds_map()
+ log.info(f"{mdsmap}")
+ self.assertIn("feature_63", mdsmap['compat']['incompat'])
+
+ def test_rm_compat(self):
+ """
+ Test removing a compat.
+ """
+
+ self.fs.fail()
+ self.fs.add_compat(63, 'placeholder')
+ self.fs.rm_compat(63)
+ mdsmap = self.fs.get_mds_map()
+ self.assertNotIn("feature_63", mdsmap['compat']['compat'])
+
+ def test_rm_incompat(self):
+ """
+ Test removing an incompat.
+ """
+
+ self.fs.fail()
+ self.fs.add_incompat(63, 'placeholder')
+ self.fs.rm_incompat(63)
+ mdsmap = self.fs.get_mds_map()
+ self.assertNotIn("feature_63", mdsmap['compat']['incompat'])
+
+ def test_standby_compat(self):
+ """
+ That adding a compat does not prevent standbys from joining.
+ """
+
+ self.fs.fail()
+ self.fs.add_compat(63, "placeholder")
+ self.fs.set_joinable()
+ self.fs.wait_for_daemons()
+ mdsmap = self.fs.get_mds_map()
+ self.assertIn("feature_63", mdsmap['compat']['compat'])
+
+ def test_standby_incompat_reject(self):
+ """
+ That adding an incompat feature prevents incompatible daemons from joining.
+ """
+
+ self.fs.fail()
+ self.fs.add_incompat(63, "placeholder")
+ self.fs.set_joinable()
+ try:
+ self.fs.wait_for_daemons(timeout=60)
+ except RuntimeError as e:
+ if "Timed out waiting for MDS daemons to become healthy" in str(e):
+ pass
+ else:
+ raise
+ else:
+ self.fail()
+
+ def test_standby_incompat_upgrade(self):
+ """
+ That an MDS can upgrade the compat of a fs.
+ """
+
+ self.fs.fail()
+ self.fs.rm_incompat(1)
+ self.fs.set_joinable()
+ self.fs.wait_for_daemons()
+ mdsmap = self.fs.get_mds_map()
+ self.assertIn("feature_1", mdsmap['compat']['incompat'])
+
+ def test_standby_replay_not_upgradeable(self):
+ """
+ That the mons will not upgrade the MDSMap compat if standby-replay is
+ enabled.
+ """
+
+ self.fs.fail()
+ self.fs.rm_incompat(1)
+ self.fs.set_allow_standby_replay(True)
+ self.fs.set_joinable()
+ try:
+ self.fs.wait_for_daemons(timeout=60)
+ except RuntimeError as e:
+ if "Timed out waiting for MDS daemons to become healthy" in str(e):
+ pass
+ else:
+ raise
+ else:
+ self.fail()
+
+ def test_standby_incompat_reject_multifs(self):
+ """
+ Like test_standby_incompat_reject but with a second fs.
+ """
+
+ fs2 = self.mds_cluster.newfs(name="cephfs2", create=True)
+ fs2.fail()
+ fs2.add_incompat(63, 'placeholder')
+ fs2.set_joinable()
+ try:
+ fs2.wait_for_daemons(timeout=60)
+ except RuntimeError as e:
+ if "Timed out waiting for MDS daemons to become healthy" in str(e):
+ pass
+ else:
+ raise
+ else:
+ self.fail()
+ # did self.fs lose MDS or standbys suicide?
+ self.fs.wait_for_daemons()
+ mdsmap = fs2.get_mds_map()
+ self.assertIn("feature_63", mdsmap['compat']['incompat'])
+
+class TestConfigCommands(CephFSTestCase):
+ """
+ Test that daemons and clients respond to the otherwise rarely-used
+ runtime config modification operations.
+ """
+
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 1
+
+ def test_ceph_config_show(self):
+ """
+ That I can successfully show MDS configuration.
+ """
+
+ names = self.fs.get_rank_names()
+ for n in names:
+ s = self.fs.mon_manager.raw_cluster_cmd("config", "show", "mds."+n)
+ self.assertTrue("NAME" in s)
+ self.assertTrue("mon_host" in s)
+
+
+ def test_client_config(self):
+ """
+ That I can successfully issue asok "config set" commands
+
+ :return:
+ """
+
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("Test only applies to FUSE clients")
+
+ test_key = "client_cache_size"
+ test_val = "123"
+ self.mount_a.admin_socket(['config', 'set', test_key, test_val])
+ out = self.mount_a.admin_socket(['config', 'get', test_key])
+ self.assertEqual(out[test_key], test_val)
+
+
+ def test_mds_config_asok(self):
+ test_key = "mds_max_purge_ops"
+ test_val = "123"
+ self.fs.mds_asok(['config', 'set', test_key, test_val])
+ out = self.fs.mds_asok(['config', 'get', test_key])
+ self.assertEqual(out[test_key], test_val)
+
+ def test_mds_dump_cache_asok(self):
+ cache_file = "cache_file"
+ timeout = "1"
+ self.fs.rank_asok(['dump', 'cache', cache_file, timeout])
+
+ def test_mds_config_tell(self):
+ test_key = "mds_max_purge_ops"
+ test_val = "123"
+
+ self.fs.rank_tell(['injectargs', "--{0}={1}".format(test_key, test_val)])
+
+ # Read it back with asok because there is no `tell` equivalent
+ out = self.fs.rank_tell(['config', 'get', test_key])
+ self.assertEqual(out[test_key], test_val)
+
+
+class TestMirroringCommands(CephFSTestCase):
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 1
+
+ def _enable_mirroring(self, fs_name):
+ self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", fs_name)
+
+ def _disable_mirroring(self, fs_name):
+ self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", fs_name)
+
+ def _add_peer(self, fs_name, peer_spec, remote_fs_name):
+ peer_uuid = str(uuid.uuid4())
+ self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "peer_add", fs_name, peer_uuid, peer_spec, remote_fs_name)
+
+ def _remove_peer(self, fs_name, peer_uuid):
+ self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "peer_remove", fs_name, peer_uuid)
+
+ def _verify_mirroring(self, fs_name, flag_str):
+ status = self.fs.status()
+ fs_map = status.get_fsmap_byname(fs_name)
+ if flag_str == 'enabled':
+ self.assertTrue('mirror_info' in fs_map)
+ elif flag_str == 'disabled':
+ self.assertTrue('mirror_info' not in fs_map)
+ else:
+ raise RuntimeError(f'invalid flag_str {flag_str}')
+
+ def _get_peer_uuid(self, fs_name, peer_spec):
+ status = self.fs.status()
+ fs_map = status.get_fsmap_byname(fs_name)
+ mirror_info = fs_map.get('mirror_info', None)
+ self.assertTrue(mirror_info is not None)
+ for peer_uuid, remote in mirror_info['peers'].items():
+ client_name = remote['remote']['client_name']
+ cluster_name = remote['remote']['cluster_name']
+ spec = f'{client_name}@{cluster_name}'
+ if spec == peer_spec:
+ return peer_uuid
+ return None
+
+ def test_mirroring_command(self):
+ """basic mirroring command test -- enable, disable mirroring on a
+ filesystem"""
+ self._enable_mirroring(self.fs.name)
+ self._verify_mirroring(self.fs.name, "enabled")
+ self._disable_mirroring(self.fs.name)
+ self._verify_mirroring(self.fs.name, "disabled")
+
+ def test_mirroring_peer_commands(self):
+ """test adding and removing peers to a mirror enabled filesystem"""
+ self._enable_mirroring(self.fs.name)
+ self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b")
+ self._add_peer(self.fs.name, "client.site-c@site-c", "fs_c")
+ self._verify_mirroring(self.fs.name, "enabled")
+ uuid_peer_b = self._get_peer_uuid(self.fs.name, "client.site-b@site-b")
+ uuid_peer_c = self._get_peer_uuid(self.fs.name, "client.site-c@site-c")
+ self.assertTrue(uuid_peer_b is not None)
+ self.assertTrue(uuid_peer_c is not None)
+ self._remove_peer(self.fs.name, uuid_peer_b)
+ self._remove_peer(self.fs.name, uuid_peer_c)
+ self._disable_mirroring(self.fs.name)
+ self._verify_mirroring(self.fs.name, "disabled")
+
+ def test_mirroring_command_idempotency(self):
+ """test to check idempotency of mirroring family of commands """
+ self._enable_mirroring(self.fs.name)
+ self._verify_mirroring(self.fs.name, "enabled")
+ self._enable_mirroring(self.fs.name)
+ # add peer
+ self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b")
+ uuid_peer_b1 = self._get_peer_uuid(self.fs.name, "client.site-b@site-b")
+ self.assertTrue(uuid_peer_b1 is not None)
+ # adding the peer again should be idempotent
+ self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b")
+ uuid_peer_b2 = self._get_peer_uuid(self.fs.name, "client.site-b@site-b")
+ self.assertTrue(uuid_peer_b2 is not None)
+ self.assertTrue(uuid_peer_b1 == uuid_peer_b2)
+ # remove peer
+ self._remove_peer(self.fs.name, uuid_peer_b1)
+ uuid_peer_b3 = self._get_peer_uuid(self.fs.name, "client.site-b@site-b")
+ self.assertTrue(uuid_peer_b3 is None)
+ # removing the peer again should be idempotent
+ self._remove_peer(self.fs.name, uuid_peer_b1)
+ self._disable_mirroring(self.fs.name)
+ self._verify_mirroring(self.fs.name, "disabled")
+ self._disable_mirroring(self.fs.name)
+
+ def test_mirroring_disable_with_peers(self):
+ """test disabling mirroring for a filesystem with active peers"""
+ self._enable_mirroring(self.fs.name)
+ self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b")
+ self._verify_mirroring(self.fs.name, "enabled")
+ uuid_peer_b = self._get_peer_uuid(self.fs.name, "client.site-b@site-b")
+ self.assertTrue(uuid_peer_b is not None)
+ self._disable_mirroring(self.fs.name)
+ self._verify_mirroring(self.fs.name, "disabled")
+ # enable mirroring to check old peers
+ self._enable_mirroring(self.fs.name)
+ self._verify_mirroring(self.fs.name, "enabled")
+ # peer should be gone
+ uuid_peer_b = self._get_peer_uuid(self.fs.name, "client.site-b@site-b")
+ self.assertTrue(uuid_peer_b is None)
+ self._disable_mirroring(self.fs.name)
+ self._verify_mirroring(self.fs.name, "disabled")
+
+ def test_mirroring_with_filesystem_reset(self):
+ """test to verify mirroring state post filesystem reset"""
+ self._enable_mirroring(self.fs.name)
+ self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b")
+ self._verify_mirroring(self.fs.name, "enabled")
+ uuid_peer_b = self._get_peer_uuid(self.fs.name, "client.site-b@site-b")
+ self.assertTrue(uuid_peer_b is not None)
+ # reset filesystem
+ self.fs.fail()
+ self.fs.reset()
+ self.fs.wait_for_daemons()
+ self._verify_mirroring(self.fs.name, "disabled")
+
+
+class TestFsAuthorize(CephFSTestCase):
+ client_id = 'testuser'
+ client_name = 'client.' + client_id
+
+ def test_single_path_r(self):
+ PERM = 'r'
+ FS_AUTH_CAPS = (('/', PERM),)
+ self.captester = CapTester()
+ self.setup_test_env(FS_AUTH_CAPS)
+
+ self.captester.run_mon_cap_tests(self.fs, self.client_id)
+ self.captester.run_mds_cap_tests(PERM)
+
+ def test_single_path_rw(self):
+ PERM = 'rw'
+ FS_AUTH_CAPS = (('/', PERM),)
+ self.captester = CapTester()
+ self.setup_test_env(FS_AUTH_CAPS)
+
+ self.captester.run_mon_cap_tests(self.fs, self.client_id)
+ self.captester.run_mds_cap_tests(PERM)
+
+ def test_single_path_rootsquash(self):
+ PERM = 'rw'
+ FS_AUTH_CAPS = (('/', PERM, 'root_squash'),)
+ self.captester = CapTester()
+ self.setup_test_env(FS_AUTH_CAPS)
+
+ # testing MDS caps...
+ # Since root_squash is set in client caps, client can read but not
+ # write even thought access level is set to "rw".
+ self.captester.conduct_pos_test_for_read_caps()
+ self.captester.conduct_neg_test_for_write_caps(sudo_write=True)
+
+ def test_single_path_authorize_on_nonalphanumeric_fsname(self):
+ """
+ That fs authorize command works on filesystems with names having [_.-]
+ characters
+ """
+ self.mount_a.umount_wait(require_clean=True)
+ self.mds_cluster.delete_all_filesystems()
+ fs_name = "cephfs-_."
+ self.fs = self.mds_cluster.newfs(name=fs_name)
+ self.fs.wait_for_daemons()
+ self.run_cluster_cmd(f'auth caps client.{self.mount_a.client_id} '
+ f'mon "allow r" '
+ f'osd "allow rw pool={self.fs.get_data_pool_name()}" '
+ f'mds allow')
+ self.mount_a.remount(cephfs_name=self.fs.name)
+ PERM = 'rw'
+ FS_AUTH_CAPS = (('/', PERM),)
+ self.captester = CapTester()
+ self.setup_test_env(FS_AUTH_CAPS)
+ self.captester.run_mds_cap_tests(PERM)
+
+ def test_multiple_path_r(self):
+ PERM = 'r'
+ FS_AUTH_CAPS = (('/dir1/dir12', PERM), ('/dir2/dir22', PERM))
+ for c in FS_AUTH_CAPS:
+ self.mount_a.run_shell(f'mkdir -p .{c[0]}')
+ self.captesters = (CapTester(), CapTester())
+ self.setup_test_env(FS_AUTH_CAPS)
+
+ self.run_cap_test_one_by_one(FS_AUTH_CAPS)
+
+ def test_multiple_path_rw(self):
+ PERM = 'rw'
+ FS_AUTH_CAPS = (('/dir1/dir12', PERM), ('/dir2/dir22', PERM))
+ for c in FS_AUTH_CAPS:
+ self.mount_a.run_shell(f'mkdir -p .{c[0]}')
+ self.captesters = (CapTester(), CapTester())
+ self.setup_test_env(FS_AUTH_CAPS)
+
+ self.run_cap_test_one_by_one(FS_AUTH_CAPS)
+
+ def run_cap_test_one_by_one(self, fs_auth_caps):
+ keyring = self.run_cluster_cmd(f'auth get {self.client_name}')
+ for i, c in enumerate(fs_auth_caps):
+ self.assertIn(i, (0, 1))
+ PATH = c[0]
+ PERM = c[1]
+ self._remount(keyring, PATH)
+ # actual tests...
+ self.captesters[i].run_mon_cap_tests(self.fs, self.client_id)
+ self.captesters[i].run_mds_cap_tests(PERM, PATH)
+
+ def tearDown(self):
+ self.mount_a.umount_wait()
+ self.run_cluster_cmd(f'auth rm {self.client_name}')
+
+ super(type(self), self).tearDown()
+
+ def _remount(self, keyring, path='/'):
+ keyring_path = self.mount_a.client_remote.mktemp(data=keyring)
+ self.mount_a.remount(client_id=self.client_id,
+ client_keyring_path=keyring_path,
+ cephfs_mntpt=path)
+
+ def setup_for_single_path(self, fs_auth_caps):
+ self.captester.write_test_files((self.mount_a,), '/')
+ keyring = self.fs.authorize(self.client_id, fs_auth_caps)
+ self._remount(keyring)
+
+ def setup_for_multiple_paths(self, fs_auth_caps):
+ for i, c in enumerate(fs_auth_caps):
+ PATH = c[0]
+ self.captesters[i].write_test_files((self.mount_a,), PATH)
+
+ self.fs.authorize(self.client_id, fs_auth_caps)
+
+ def setup_test_env(self, fs_auth_caps):
+ if len(fs_auth_caps) == 1:
+ self.setup_for_single_path(fs_auth_caps[0])
+ else:
+ self.setup_for_multiple_paths(fs_auth_caps)
+
+
+class TestAdminCommandIdempotency(CephFSTestCase):
+ """
+ Tests for administration command idempotency.
+ """
+
+ CLIENTS_REQUIRED = 0
+ MDSS_REQUIRED = 1
+
+ def test_rm_idempotency(self):
+ """
+ That a removing a fs twice is idempotent.
+ """
+
+ data_pools = self.fs.get_data_pool_names(refresh=True)
+ self.fs.fail()
+ self.fs.rm()
+ try:
+ self.fs.get_mds_map()
+ except FSMissing:
+ pass
+ else:
+ self.fail("get_mds_map should raise")
+ p = self.fs.rm()
+ self.assertIn("does not exist", p.stderr.getvalue())
+ self.fs.remove_pools(data_pools)
+
+
+class TestAdminCommandDumpTree(CephFSTestCase):
+ """
+ Tests for administration command subtrees.
+ """
+
+ CLIENTS_REQUIRED = 0
+ MDSS_REQUIRED = 1
+
+ def test_dump_subtrees(self):
+ """
+ Dump all the subtrees to make sure the MDS daemon won't crash.
+ """
+
+ subtrees = self.fs.mds_asok(['get', 'subtrees'])
+ log.info(f"dumping {len(subtrees)} subtrees:")
+ for subtree in subtrees:
+ log.info(f" subtree: '{subtree['dir']['path']}'")
+ self.fs.mds_asok(['dump', 'tree', subtree['dir']['path']])
+
+ log.info("dumping 2 special subtrees:")
+ log.info(" subtree: '/'")
+ self.fs.mds_asok(['dump', 'tree', '/'])
+ log.info(" subtree: '~mdsdir'")
+ self.fs.mds_asok(['dump', 'tree', '~mdsdir'])
+
+class TestAdminCommandDumpLoads(CephFSTestCase):
+ """
+ Tests for administration command dump loads.
+ """
+
+ CLIENTS_REQUIRED = 0
+ MDSS_REQUIRED = 1
+
+ def test_dump_loads(self):
+ """
+ make sure depth limit param is considered when dump loads for a MDS daemon.
+ """
+
+ log.info("dumping loads")
+ loads = self.fs.mds_asok(['dump', 'loads', '1'])
+ self.assertIsNotNone(loads)
+ self.assertIn("dirfrags", loads)
+ for d in loads["dirfrags"]:
+ self.assertLessEqual(d["path"].count("/"), 1)
+
+class TestFsBalRankMask(CephFSTestCase):
+ """
+ Tests ceph fs set <fs_name> bal_rank_mask
+ """
+
+ CLIENTS_REQUIRED = 0
+ MDSS_REQUIRED = 2
+
+ def test_bal_rank_mask(self):
+ """
+ check whether a specified bal_rank_mask value is valid or not.
+ """
+ bal_rank_mask = '0x0'
+ log.info(f"set bal_rank_mask {bal_rank_mask}")
+ self.fs.set_bal_rank_mask(bal_rank_mask)
+ self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+ bal_rank_mask = '0'
+ log.info(f"set bal_rank_mask {bal_rank_mask}")
+ self.fs.set_bal_rank_mask(bal_rank_mask)
+ self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+ bal_rank_mask = '-1'
+ log.info(f"set bal_rank_mask {bal_rank_mask}")
+ self.fs.set_bal_rank_mask(bal_rank_mask)
+ self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+ bal_rank_mask = 'all'
+ log.info(f"set bal_rank_mask {bal_rank_mask}")
+ self.fs.set_bal_rank_mask(bal_rank_mask)
+ self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+ bal_rank_mask = '0x1'
+ log.info(f"set bal_rank_mask {bal_rank_mask}")
+ self.fs.set_bal_rank_mask(bal_rank_mask)
+ self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+ bal_rank_mask = '1'
+ log.info(f"set bal_rank_mask {bal_rank_mask}")
+ self.fs.set_bal_rank_mask(bal_rank_mask)
+ self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+ bal_rank_mask = 'f0'
+ log.info(f"set bal_rank_mask {bal_rank_mask}")
+ self.fs.set_bal_rank_mask(bal_rank_mask)
+ self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+ bal_rank_mask = 'ab'
+ log.info(f"set bal_rank_mask {bal_rank_mask}")
+ self.fs.set_bal_rank_mask(bal_rank_mask)
+ self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+ bal_rank_mask = '0xfff0'
+ log.info(f"set bal_rank_mask {bal_rank_mask}")
+ self.fs.set_bal_rank_mask(bal_rank_mask)
+ self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+ MAX_MDS = 256
+ bal_rank_mask = '0x' + 'f' * int(MAX_MDS / 4)
+ log.info(f"set bal_rank_mask {bal_rank_mask}")
+ self.fs.set_bal_rank_mask(bal_rank_mask)
+ self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask'))
+
+ bal_rank_mask = ''
+ log.info("set bal_rank_mask to empty string")
+ try:
+ self.fs.set_bal_rank_mask(bal_rank_mask)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EINVAL)
+
+ bal_rank_mask = '0x1' + 'f' * int(MAX_MDS / 4)
+ log.info(f"set bal_rank_mask {bal_rank_mask}")
+ try:
+ self.fs.set_bal_rank_mask(bal_rank_mask)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EINVAL)
diff --git a/qa/tasks/cephfs/test_auto_repair.py b/qa/tasks/cephfs/test_auto_repair.py
new file mode 100644
index 000000000..e6f0a8f0b
--- /dev/null
+++ b/qa/tasks/cephfs/test_auto_repair.py
@@ -0,0 +1,88 @@
+
+"""
+Exercise the MDS's auto repair functions
+"""
+
+import logging
+import time
+
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+
+log = logging.getLogger(__name__)
+
+
+# Arbitrary timeouts for operations involving restarting
+# an MDS or waiting for it to come up
+MDS_RESTART_GRACE = 60
+
+
+class TestMDSAutoRepair(CephFSTestCase):
+ def test_backtrace_repair(self):
+ """
+ MDS should verify/fix backtrace on fetch dirfrag
+ """
+
+ self.mount_a.run_shell(["mkdir", "testdir1"])
+ self.mount_a.run_shell(["touch", "testdir1/testfile"])
+ dir_objname = "{:x}.00000000".format(self.mount_a.path_to_ino("testdir1"))
+
+ # drop inodes caps
+ self.mount_a.umount_wait()
+
+ # flush journal entries to dirfrag objects, and expire journal
+ self.fs.mds_asok(['flush', 'journal'])
+
+ # Restart the MDS to drop the metadata cache (because we expired the journal,
+ # nothing gets replayed into cache on restart)
+ self.fs.rank_fail()
+ self.fs.wait_for_daemons()
+
+ # remove testdir1's backtrace
+ self.fs.radosm(["rmxattr", dir_objname, "parent"])
+
+ # readdir (fetch dirfrag) should fix testdir1's backtrace
+ self.mount_a.mount_wait()
+ self.mount_a.run_shell(["ls", "testdir1"])
+
+ # flush journal entries to dirfrag objects
+ self.fs.mds_asok(['flush', 'journal'])
+
+ # check if backtrace exists
+ self.fs.radosm(["getxattr", dir_objname, "parent"])
+
+ def test_mds_readonly(self):
+ """
+ test if MDS behave correct when it's readonly
+ """
+ # operation should successd when MDS is not readonly
+ self.mount_a.run_shell(["touch", "test_file1"])
+ writer = self.mount_a.write_background(loop=True)
+
+ time.sleep(10)
+ self.assertFalse(writer.finished)
+
+ # force MDS to read-only mode
+ self.fs.mds_asok(['force_readonly'])
+ time.sleep(10)
+
+ # touching test file should fail
+ try:
+ self.mount_a.run_shell(["touch", "test_file1"])
+ except CommandFailedError:
+ pass
+ else:
+ self.assertTrue(False)
+
+ # background writer also should fail
+ self.assertTrue(writer.finished)
+
+ # The MDS should report its readonly health state to the mon
+ self.wait_for_health("MDS_READ_ONLY", timeout=30)
+
+ # restart mds to make it writable
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_daemons()
+
+ self.wait_for_health_clear(timeout=30)
diff --git a/qa/tasks/cephfs/test_backtrace.py b/qa/tasks/cephfs/test_backtrace.py
new file mode 100644
index 000000000..6b094569b
--- /dev/null
+++ b/qa/tasks/cephfs/test_backtrace.py
@@ -0,0 +1,102 @@
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from tasks.cephfs.filesystem import ObjectNotFound
+
+class TestBacktrace(CephFSTestCase):
+ def test_backtrace(self):
+ """
+ That the 'parent' 'layout' and 'symlink' xattrs on the head objects of files
+ are updated correctly.
+ """
+
+ old_data_pool_name = self.fs.get_data_pool_name()
+ old_pool_id = self.fs.get_data_pool_id()
+
+ # Not enabling symlink recovery option should not store symlink xattr
+ self.config_set('mds', 'mds_symlink_recovery', 'false')
+ self.mount_a.run_shell(["mkdir", "sym_dir0"])
+ self.mount_a.run_shell(["touch", "sym_dir0/file1"])
+ self.mount_a.run_shell(["ln", "-s", "sym_dir0/file1", "sym_dir0/symlink_file1"])
+ file_ino = self.mount_a.path_to_ino("sym_dir0/symlink_file1", follow_symlinks=False)
+
+ self.fs.mds_asok(["flush", "journal"])
+ with self.assertRaises(ObjectNotFound):
+ self.fs.read_symlink(file_ino)
+
+ # Enabling symlink recovery option should store symlink xattr for symlinks
+ self.config_set('mds', 'mds_symlink_recovery', 'true')
+ self.mount_a.run_shell(["mkdir", "sym_dir"])
+ self.mount_a.run_shell(["touch", "sym_dir/file1"])
+ self.mount_a.run_shell(["ln", "-s", "./file1", "sym_dir/symlink_file1"])
+ file_ino = self.mount_a.path_to_ino("sym_dir/symlink_file1", follow_symlinks=False)
+
+ self.fs.mds_asok(["flush", "journal"])
+ symlink = self.fs.read_symlink(file_ino)
+ self.assertEqual(symlink, {
+ "s" : "./file1",
+ })
+
+ # Create a file for subsequent checks
+ self.mount_a.run_shell(["mkdir", "parent_a"])
+ self.mount_a.run_shell(["touch", "parent_a/alpha"])
+ file_ino = self.mount_a.path_to_ino("parent_a/alpha")
+
+ # That backtrace and layout are written after initial flush
+ self.fs.mds_asok(["flush", "journal"])
+ backtrace = self.fs.read_backtrace(file_ino)
+ self.assertEqual(['alpha', 'parent_a'], [a['dname'] for a in backtrace['ancestors']])
+ layout = self.fs.read_layout(file_ino)
+ self.assertDictEqual(layout, {
+ "stripe_unit": 4194304,
+ "stripe_count": 1,
+ "object_size": 4194304,
+ "pool_id": old_pool_id,
+ "pool_ns": "",
+ })
+ self.assertEqual(backtrace['pool'], old_pool_id)
+
+ # That backtrace is written after parentage changes
+ self.mount_a.run_shell(["mkdir", "parent_b"])
+ self.mount_a.run_shell(["mv", "parent_a/alpha", "parent_b/alpha"])
+
+ self.fs.mds_asok(["flush", "journal"])
+ backtrace = self.fs.read_backtrace(file_ino)
+ self.assertEqual(['alpha', 'parent_b'], [a['dname'] for a in backtrace['ancestors']])
+
+ # Create a new data pool
+ new_pool_name = "data_new"
+ new_pool_id = self.fs.add_data_pool(new_pool_name)
+
+ # That an object which has switched pools gets its backtrace updated
+ self.mount_a.setfattr("./parent_b/alpha",
+ "ceph.file.layout.pool", new_pool_name)
+ self.fs.mds_asok(["flush", "journal"])
+ backtrace_old_pool = self.fs.read_backtrace(file_ino, pool=old_data_pool_name)
+ self.assertEqual(backtrace_old_pool['pool'], new_pool_id)
+ backtrace_new_pool = self.fs.read_backtrace(file_ino, pool=new_pool_name)
+ self.assertEqual(backtrace_new_pool['pool'], new_pool_id)
+ new_pool_layout = self.fs.read_layout(file_ino, pool=new_pool_name)
+ self.assertEqual(new_pool_layout['pool_id'], new_pool_id)
+ self.assertEqual(new_pool_layout['pool_ns'], '')
+
+ # That subsequent linkage changes are only written to new pool backtrace
+ self.mount_a.run_shell(["mkdir", "parent_c"])
+ self.mount_a.run_shell(["mv", "parent_b/alpha", "parent_c/alpha"])
+ self.fs.mds_asok(["flush", "journal"])
+ backtrace_old_pool = self.fs.read_backtrace(file_ino, pool=old_data_pool_name)
+ self.assertEqual(['alpha', 'parent_b'], [a['dname'] for a in backtrace_old_pool['ancestors']])
+ backtrace_new_pool = self.fs.read_backtrace(file_ino, pool=new_pool_name)
+ self.assertEqual(['alpha', 'parent_c'], [a['dname'] for a in backtrace_new_pool['ancestors']])
+
+ # That layout is written to new pool after change to other field in layout
+ self.mount_a.setfattr("./parent_c/alpha",
+ "ceph.file.layout.object_size", "8388608")
+
+ self.fs.mds_asok(["flush", "journal"])
+ new_pool_layout = self.fs.read_layout(file_ino, pool=new_pool_name)
+ self.assertEqual(new_pool_layout['object_size'], 8388608)
+
+ # ...but not to the old pool: the old pool's backtrace points to the new pool, and that's enough,
+ # we don't update the layout in all the old pools whenever it changes
+ old_pool_layout = self.fs.read_layout(file_ino, pool=old_data_pool_name)
+ self.assertEqual(old_pool_layout['object_size'], 4194304)
diff --git a/qa/tasks/cephfs/test_cap_flush.py b/qa/tasks/cephfs/test_cap_flush.py
new file mode 100644
index 000000000..70fdc3893
--- /dev/null
+++ b/qa/tasks/cephfs/test_cap_flush.py
@@ -0,0 +1,58 @@
+
+import os
+import time
+from textwrap import dedent
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
+
+class TestCapFlush(CephFSTestCase):
+ @for_teuthology
+ def test_replay_create(self):
+ """
+ MDS starts to handle client caps when it enters clientreplay stage.
+ When handling a client cap in clientreplay stage, it's possible that
+ corresponding inode does not exist because the client request which
+ creates inode hasn't been replayed.
+ """
+
+ dir_path = os.path.join(self.mount_a.mountpoint, "testdir")
+ py_script = dedent("""
+ import os
+ os.mkdir("{0}")
+ fd = os.open("{0}", os.O_RDONLY)
+ os.fchmod(fd, 0o777)
+ os.fsync(fd)
+ """).format(dir_path)
+ self.mount_a.run_python(py_script)
+
+ self.fs.mds_asok(["flush", "journal"])
+
+ # client will only get unsafe replay
+ self.fs.mds_asok(["config", "set", "mds_log_pause", "1"])
+
+ file_name = "testfile"
+ file_path = dir_path + "/" + file_name
+
+ # Create a file and modify its mode. ceph-fuse will mark Ax cap dirty
+ py_script = dedent("""
+ import os
+ os.chdir("{0}")
+ os.setgid(65534)
+ os.setuid(65534)
+ fd = os.open("{1}", os.O_CREAT | os.O_RDWR, 0o644)
+ os.fchmod(fd, 0o640)
+ """).format(dir_path, file_name)
+ self.mount_a.run_python(py_script, sudo=True)
+
+ # Modify file mode by different user. ceph-fuse will send a setattr request
+ self.mount_a.run_shell(["sudo", "chmod", "600", file_path], wait=False, omit_sudo=False)
+
+ time.sleep(10)
+
+ # Restart mds. Client will re-send the unsafe request and cap flush
+ self.fs.rank_fail()
+ self.fs.wait_for_daemons()
+
+ mode = self.mount_a.run_shell(['stat', '-c' '%a', file_path]).stdout.getvalue().strip()
+ # If the cap flush get dropped, mode should be 0644.
+ # (Ax cap stays in dirty state, which prevents setattr reply from updating file mode)
+ self.assertEqual(mode, "600")
diff --git a/qa/tasks/cephfs/test_cephfs_shell.py b/qa/tasks/cephfs/test_cephfs_shell.py
new file mode 100644
index 000000000..9f7434762
--- /dev/null
+++ b/qa/tasks/cephfs/test_cephfs_shell.py
@@ -0,0 +1,1167 @@
+"""
+NOTE: For running this tests locally (using vstart_runner.py), export the
+path to src/tools/cephfs/shell/cephfs-shell module to $PATH. Running
+"export PATH=$PATH:$(cd ../src/tools/cephfs/shell && pwd)" from the build dir
+will update the environment without hassles of typing the path correctly.
+"""
+from io import StringIO
+from os import path
+import crypt
+import logging
+from tempfile import mkstemp as tempfile_mkstemp
+import math
+from time import sleep
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+from textwrap import dedent
+
+log = logging.getLogger(__name__)
+
+
+def humansize(nbytes):
+ suffixes = ['B', 'K', 'M', 'G', 'T', 'P']
+ i = 0
+ while nbytes >= 1024 and i < len(suffixes) - 1:
+ nbytes /= 1024.
+ i += 1
+ nbytes = math.ceil(nbytes)
+ f = ('%d' % nbytes).rstrip('.')
+ return '%s%s' % (f, suffixes[i])
+
+
+def ensure_str(s):
+ if isinstance(s, str):
+ return s
+ if isinstance(s, bytes):
+ return s.decode()
+ raise TypeError("not expecting type '%s'" % type(s))
+
+
+class TestCephFSShell(CephFSTestCase):
+ CLIENTS_REQUIRED = 1
+
+ def setUp(self):
+ super(TestCephFSShell, self).setUp()
+
+ conf_contents = "[cephfs-shell]\ncolors = False\ndebug = True\n"
+ confpath = self.mount_a.client_remote.sh('mktemp').strip()
+ self.mount_a.client_remote.write_file(confpath, conf_contents)
+ self.default_shell_conf_path = confpath
+
+ def run_cephfs_shell_cmd(self, cmd, mount_x=None, shell_conf_path=None,
+ opts=None, stdout=None, stderr=None, stdin=None,
+ check_status=True):
+ stdout = stdout or StringIO()
+ stderr = stderr or StringIO()
+ if mount_x is None:
+ mount_x = self.mount_a
+ if isinstance(cmd, list):
+ cmd = " ".join(cmd)
+ if not shell_conf_path:
+ shell_conf_path = self.default_shell_conf_path
+
+ args = ["cephfs-shell", "-c", shell_conf_path]
+ if opts:
+ args += opts
+ args.extend(("--", cmd))
+
+ log.info("Running command: {}".format(" ".join(args)))
+ return mount_x.client_remote.run(args=args, stdout=stdout,
+ stderr=stderr, stdin=stdin,
+ check_status=check_status)
+
+ def negtest_cephfs_shell_cmd(self, **kwargs):
+ """
+ This method verifies that cephfs shell command fails with expected
+ return value and/or error message.
+
+ kwargs is expected to hold the arguments same as
+ run_cephfs_shell_cmd() with the following exceptions -
+ * It should not contain check_status (since commands are expected
+ to fail, check_status is hardcoded to False).
+ * It is optional to set expected error message and return value to
+ dict members 'errmsg' and 'retval' respectively.
+
+ This method servers as shorthand for codeblocks like -
+
+ try:
+ proc = self.run_cephfs_shell_cmd(args=['some', 'cmd'],
+ check_status=False,
+ stdout=stdout)
+ except CommandFailedError as e:
+ self.assertNotIn('some error message',
+ proc.stderr.getvalue.lower())
+
+
+ try:
+ proc = self.run_cephfs_shell_cmd(args=['some', 'cmd'],
+ check_status=False,
+ stdout=stdout)
+ except CommandFailedError as e:
+ self.assertNotEqual(1, proc.returncode)
+ """
+ retval = kwargs.pop('retval', None)
+ errmsg = kwargs.pop('errmsg', None)
+ kwargs['check_status'] = False
+
+ proc = self.run_cephfs_shell_cmd(**kwargs)
+ if retval:
+ self.assertEqual(proc.returncode, retval)
+ else:
+ self.assertNotEqual(proc.returncode, 0)
+ if errmsg:
+ self.assertIn(errmsg, proc.stderr.getvalue().lower())
+
+ return proc
+
+ def get_cephfs_shell_cmd_output(self, cmd, mount_x=None,
+ shell_conf_path=None, opts=None,
+ stdout=None, stdin=None,
+ check_status=True):
+ return ensure_str(self.run_cephfs_shell_cmd(
+ cmd=cmd, mount_x=mount_x, shell_conf_path=shell_conf_path,
+ opts=opts, stdout=stdout, stdin=stdin,
+ check_status=check_status).stdout.getvalue().strip())
+
+ def get_cephfs_shell_cmd_error(self, cmd, mount_x=None,
+ shell_conf_path=None, opts=None,
+ stderr=None, stdin=None, check_status=True):
+ return ensure_str(self.run_cephfs_shell_cmd(
+ cmd=cmd, mount_x=mount_x, shell_conf_path=shell_conf_path,
+ opts=opts, stderr=stderr, stdin=stdin,
+ check_status=check_status).stderr.getvalue().strip())
+
+ def run_cephfs_shell_script(self, script, mount_x=None,
+ shell_conf_path=None, opts=None, stdout=None,
+ stderr=None, stdin=None, check_status=True):
+ stdout = stdout or StringIO()
+ stderr = stderr or StringIO()
+ if mount_x is None:
+ mount_x = self.mount_a
+
+ scriptpath = tempfile_mkstemp(prefix='test-cephfs', text=True)[1]
+ with open(scriptpath, 'w') as scriptfile:
+ scriptfile.write(script)
+ # copy script to the machine running cephfs-shell.
+ mount_x.client_remote.put_file(scriptpath, scriptpath)
+ mount_x.run_shell_payload(f"chmod 755 {scriptpath}")
+
+ args = ["cephfs-shell", '-b', scriptpath]
+ if shell_conf_path:
+ args[1:1] = ["-c", shell_conf_path]
+ log.info('Running script \"' + scriptpath + '\"')
+ return mount_x.client_remote.run(args=args, stdout=stdout,
+ stderr=stderr, stdin=stdin,
+ check_status=True)
+
+ def get_cephfs_shell_script_output(self, script, mount_x=None,
+ shell_conf_path=None, opts=None,
+ stdout=None, stdin=None,
+ check_status=True):
+ return ensure_str(self.run_cephfs_shell_script(
+ script=script, mount_x=mount_x, shell_conf_path=shell_conf_path,
+ opts=opts, stdout=stdout, stdin=stdin,
+ check_status=check_status).stdout.getvalue().strip())
+
+
+class TestGeneric(TestCephFSShell):
+
+ def test_mistyped_cmd(self):
+ with self.assertRaises(CommandFailedError) as cm:
+ self.run_cephfs_shell_cmd('lsx')
+ self.assertEqual(cm.exception.exitstatus, 127)
+
+
+class TestMkdir(TestCephFSShell):
+ def test_mkdir(self):
+ """
+ Test that mkdir creates directory
+ """
+ o = self.get_cephfs_shell_cmd_output("mkdir d1")
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ o = self.mount_a.stat('d1')
+ log.info("mount_a output:\n{}".format(o))
+
+ def test_mkdir_with_070000_octal_mode(self):
+ """
+ Test that mkdir fails with octal mode greater than 07777
+ """
+ self.negtest_cephfs_shell_cmd(cmd="mkdir -m 070000 d2")
+ try:
+ self.mount_a.stat('d2')
+ except CommandFailedError:
+ pass
+
+ def test_mkdir_with_negative_octal_mode(self):
+ """
+ Test that mkdir fails with negative octal mode
+ """
+ self.negtest_cephfs_shell_cmd(cmd="mkdir -m -0755 d3")
+ try:
+ self.mount_a.stat('d3')
+ except CommandFailedError:
+ pass
+
+ def test_mkdir_with_non_octal_mode(self):
+ """
+ Test that mkdir passes with non-octal mode
+ """
+ o = self.get_cephfs_shell_cmd_output("mkdir -m u=rwx d4")
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # mkdir d4 should pass
+ o = self.mount_a.stat('d4')
+ assert ((o['st_mode'] & 0o700) == 0o700)
+
+ def test_mkdir_with_bad_non_octal_mode(self):
+ """
+ Test that mkdir failes with bad non-octal mode
+ """
+ self.negtest_cephfs_shell_cmd(cmd="mkdir -m ugx=0755 d5")
+ try:
+ self.mount_a.stat('d5')
+ except CommandFailedError:
+ pass
+
+ def test_mkdir_path_without_path_option(self):
+ """
+ Test that mkdir fails without path option for creating path
+ """
+ self.negtest_cephfs_shell_cmd(cmd="mkdir d5/d6/d7")
+ try:
+ self.mount_a.stat('d5/d6/d7')
+ except CommandFailedError:
+ pass
+
+ def test_mkdir_path_with_path_option(self):
+ """
+ Test that mkdir passes with path option for creating path
+ """
+ o = self.get_cephfs_shell_cmd_output("mkdir -p d5/d6/d7")
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # mkdir d5/d6/d7 should pass
+ o = self.mount_a.stat('d5/d6/d7')
+ log.info("mount_a output:\n{}".format(o))
+
+
+class TestRmdir(TestCephFSShell):
+ dir_name = "test_dir"
+
+ def dir_does_not_exists(self):
+ """
+ Tests that directory does not exists
+ """
+ try:
+ self.mount_a.stat(self.dir_name)
+ except CommandFailedError as e:
+ if e.exitstatus == 2:
+ return 0
+ raise
+
+ def test_rmdir(self):
+ """
+ Test that rmdir deletes directory
+ """
+ self.run_cephfs_shell_cmd("mkdir " + self.dir_name)
+ self.run_cephfs_shell_cmd("rmdir " + self.dir_name)
+ self.dir_does_not_exists()
+
+ def test_rmdir_non_existing_dir(self):
+ """
+ Test that rmdir does not delete a non existing directory
+ """
+ self.negtest_cephfs_shell_cmd(cmd="rmdir test_dir")
+ self.dir_does_not_exists()
+
+ def test_rmdir_dir_with_file(self):
+ """
+ Test that rmdir does not delete directory containing file
+ """
+ self.run_cephfs_shell_cmd("mkdir " + self.dir_name)
+
+ self.run_cephfs_shell_cmd("put - test_dir/dumpfile", stdin="Valid File")
+ # see comment below
+ # with self.assertRaises(CommandFailedError) as cm:
+ with self.assertRaises(CommandFailedError):
+ self.run_cephfs_shell_cmd("rmdir " + self.dir_name)
+ # TODO: we need to check for exit code and error message as well.
+ # skipping it for not since error codes used by cephfs-shell are not
+ # standard and they may change soon.
+ # self.assertEqual(cm.exception.exitcode, 39)
+ self.mount_a.stat(self.dir_name)
+
+ def test_rmdir_existing_file(self):
+ """
+ Test that rmdir does not delete a file
+ """
+ self.run_cephfs_shell_cmd("put - dumpfile", stdin="Valid File")
+ self.negtest_cephfs_shell_cmd(cmd="rmdir dumpfile")
+ self.mount_a.stat("dumpfile")
+
+ def test_rmdir_p(self):
+ """
+ Test that rmdir -p deletes all empty directories in the root
+ directory passed
+ """
+ self.run_cephfs_shell_cmd("mkdir -p test_dir/t1/t2/t3")
+ self.run_cephfs_shell_cmd("rmdir -p " + self.dir_name)
+ self.dir_does_not_exists()
+
+ def test_rmdir_p_valid_path(self):
+ """
+ Test that rmdir -p deletes all empty directories in the path passed
+ """
+ self.run_cephfs_shell_cmd("mkdir -p test_dir/t1/t2/t3")
+ self.run_cephfs_shell_cmd("rmdir -p test_dir/t1/t2/t3")
+ self.dir_does_not_exists()
+
+ def test_rmdir_p_non_existing_dir(self):
+ """
+ Test that rmdir -p does not delete an invalid directory
+ """
+ self.negtest_cephfs_shell_cmd(cmd="rmdir -p test_dir")
+ self.dir_does_not_exists()
+
+ def test_rmdir_p_dir_with_file(self):
+ """
+ Test that rmdir -p does not delete the directory containing a file
+ """
+ self.run_cephfs_shell_cmd("mkdir " + self.dir_name)
+ self.run_cephfs_shell_cmd("put - test_dir/dumpfile",
+ stdin="Valid File")
+ self.run_cephfs_shell_cmd("rmdir -p " + self.dir_name)
+ self.mount_a.stat(self.dir_name)
+
+
+class TestLn(TestCephFSShell):
+ dir1 = 'test_dir1'
+ dir2 = 'test_dir2'
+ dump_id = 11
+ s = 'somedata'
+ dump_file = 'dump11'
+
+ def test_soft_link_without_link_name(self):
+ self.run_cephfs_shell_cmd(f'mkdir -p {self.dir1}/{self.dir2}')
+ self.mount_a.write_file(path=f'{self.dir1}/{self.dump_file}',
+ data=self.s)
+ self.run_cephfs_shell_script(script=dedent(f'''
+ cd /{self.dir1}/{self.dir2}
+ ln -s ../{self.dump_file}'''))
+ o = self.get_cephfs_shell_cmd_output(f'cat /{self.dir1}/{self.dir2}'
+ f'/{self.dump_file}')
+ self.assertEqual(self.s, o)
+
+ def test_soft_link_with_link_name(self):
+ self.run_cephfs_shell_cmd(f'mkdir -p {self.dir1}/{self.dir2}')
+ self.mount_a.write_file(path=f'{self.dir1}/{self.dump_file}',
+ data=self.s)
+ self.run_cephfs_shell_cmd(f'ln -s /{self.dir1}/{self.dump_file} '
+ f'/{self.dir1}/{self.dir2}/')
+ o = self.get_cephfs_shell_cmd_output(f'cat /{self.dir1}/{self.dir2}'
+ f'/{self.dump_file}')
+ self.assertEqual(self.s, o)
+
+ def test_hard_link_without_link_name(self):
+ self.run_cephfs_shell_cmd(f'mkdir -p {self.dir1}/{self.dir2}')
+ self.mount_a.write_file(path=f'{self.dir1}/{self.dump_file}',
+ data=self.s)
+ self.run_cephfs_shell_script(script=dedent(f'''
+ cd /{self.dir1}/{self.dir2}
+ ln ../{self.dump_file}'''))
+ o = self.get_cephfs_shell_cmd_output(f'cat /{self.dir1}/{self.dir2}'
+ f'/{self.dump_file}')
+ self.assertEqual(self.s, o)
+
+ def test_hard_link_with_link_name(self):
+ self.run_cephfs_shell_cmd(f'mkdir -p {self.dir1}/{self.dir2}')
+ self.mount_a.write_file(path=f'{self.dir1}/{self.dump_file}',
+ data=self.s)
+ self.run_cephfs_shell_cmd(f'ln /{self.dir1}/{self.dump_file} '
+ f'/{self.dir1}/{self.dir2}/')
+ o = self.get_cephfs_shell_cmd_output(f'cat /{self.dir1}/{self.dir2}'
+ f'/{self.dump_file}')
+ self.assertEqual(self.s, o)
+
+ def test_hard_link_to_dir_not_allowed(self):
+ self.run_cephfs_shell_cmd(f'mkdir {self.dir1}')
+ self.run_cephfs_shell_cmd(f'mkdir {self.dir2}')
+ r = self.run_cephfs_shell_cmd(f'ln /{self.dir1} /{self.dir2}/',
+ check_status=False)
+ self.assertEqual(r.returncode, 3)
+
+ def test_target_exists_in_dir(self):
+ self.mount_a.write_file(path=f'{self.dump_file}', data=self.s)
+ r = self.run_cephfs_shell_cmd(f'ln {self.dump_file} {self.dump_file}',
+ check_status=False)
+ self.assertEqual(r.returncode, 1)
+
+ def test_incorrect_dir(self):
+ self.mount_a.write_file(path=f'{self.dump_file}', data=self.s)
+ r = self.run_cephfs_shell_cmd(f'ln {self.dump_file} /dir1/',
+ check_status=False)
+ self.assertEqual(r.returncode, 5)
+
+
+class TestGetAndPut(TestCephFSShell):
+ def test_get_with_target_name(self):
+ """
+ Test that get passes with target name
+ """
+ s = 'C' * 1024
+ s_hash = crypt.crypt(s, '.A')
+ o = self.get_cephfs_shell_cmd_output("put - dump4", stdin=s)
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # put - dump4 should pass
+ o = self.mount_a.stat('dump4')
+ log.info("mount_a output:\n{}".format(o))
+
+ o = self.get_cephfs_shell_cmd_output("get dump4 ./dump4")
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # NOTE: cwd=None because we want to run it at CWD, not at cephfs mntpt.
+ o = self.mount_a.run_shell('cat dump4', cwd=None).stdout.getvalue(). \
+ strip()
+ o_hash = crypt.crypt(o, '.A')
+
+ # s_hash must be equal to o_hash
+ log.info("s_hash:{}".format(s_hash))
+ log.info("o_hash:{}".format(o_hash))
+ assert (s_hash == o_hash)
+
+ # cleanup
+ self.mount_a.run_shell("rm dump4", cwd=None, check_status=False)
+
+ def test_get_without_target_name(self):
+ """
+ Test that get should fail when there is no target name
+ """
+ s = 'Somedata'
+ # put - dump5 should pass
+ self.get_cephfs_shell_cmd_output("put - dump5", stdin=s)
+
+ self.mount_a.stat('dump5')
+
+ # get dump5 should fail as there is no local_path mentioned
+ with self.assertRaises(CommandFailedError):
+ self.get_cephfs_shell_cmd_output("get dump5")
+
+ # stat dump would return non-zero exit code as get dump failed
+ # cwd=None because we want to run it at CWD, not at cephfs mntpt.
+ r = self.mount_a.run_shell('stat dump5', cwd=None,
+ check_status=False).returncode
+ self.assertEqual(r, 1)
+
+ def test_get_doesnt_create_dir(self):
+ # if get cmd is creating subdirs on its own then dump7 will be
+ # stored as ./dump7/tmp/dump7 and not ./dump7, therefore
+ # if doing `cat ./dump7` returns non-zero exit code(i.e. 1) then
+ # it implies that no such file exists at that location
+ dir_abspath = path.join(self.mount_a.mountpoint, 'tmp')
+ self.mount_a.run_shell_payload(f"mkdir {dir_abspath}")
+ self.mount_a.client_remote.write_file(path.join(dir_abspath, 'dump7'),
+ 'somedata')
+ self.get_cephfs_shell_cmd_output("get /tmp/dump7 ./dump7")
+ # test that dump7 exists
+ self.mount_a.run_shell("cat ./dump7", cwd=None)
+
+ # cleanup
+ self.mount_a.run_shell(args='rm dump7', cwd=None, check_status=False)
+
+ def test_get_to_console(self):
+ """
+ Test that get passes with target name
+ """
+ s = 'E' * 1024
+ s_hash = crypt.crypt(s, '.A')
+ o = self.get_cephfs_shell_cmd_output("put - dump6", stdin=s)
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # put - dump6 should pass
+ o = self.mount_a.stat('dump6')
+ log.info("mount_a output:\n{}".format(o))
+
+ # get dump6 - should pass
+ o = self.get_cephfs_shell_cmd_output("get dump6 -")
+ o_hash = crypt.crypt(o, '.A')
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # s_hash must be equal to o_hash
+ log.info("s_hash:{}".format(s_hash))
+ log.info("o_hash:{}".format(o_hash))
+ assert (s_hash == o_hash)
+
+
+ def test_put_without_target_name(self):
+ """
+ put - should fail as the cmd expects both arguments are mandatory.
+ """
+ with self.assertRaises(CommandFailedError):
+ self.get_cephfs_shell_cmd_output("put -")
+
+ def test_put_validate_local_path(self):
+ """
+ This test is intended to make sure local_path is validated before
+ trying to put the file from local fs to cephfs and the command
+ put ./dumpXYZ dump8 would fail as dumpXYX doesn't exist.
+ """
+ with self.assertRaises(CommandFailedError):
+ o = self.get_cephfs_shell_cmd_output("put ./dumpXYZ dump8")
+ log.info("cephfs-shell output:\n{}".format(o))
+
+class TestSnapshots(TestCephFSShell):
+ def test_snap(self):
+ """
+ Test that snapshot creation and deletion work
+ """
+ sd = self.fs.get_config('client_snapdir')
+ sdn = "data_dir/{}/snap1".format(sd)
+
+ # create a data dir and dump some files into it
+ self.get_cephfs_shell_cmd_output("mkdir data_dir")
+ s = 'A' * 10240
+ o = self.get_cephfs_shell_cmd_output("put - data_dir/data_a", stdin=s)
+ s = 'B' * 10240
+ o = self.get_cephfs_shell_cmd_output("put - data_dir/data_b", stdin=s)
+ s = 'C' * 10240
+ o = self.get_cephfs_shell_cmd_output("put - data_dir/data_c", stdin=s)
+ s = 'D' * 10240
+ o = self.get_cephfs_shell_cmd_output("put - data_dir/data_d", stdin=s)
+ s = 'E' * 10240
+ o = self.get_cephfs_shell_cmd_output("put - data_dir/data_e", stdin=s)
+
+ o = self.get_cephfs_shell_cmd_output("ls -l /data_dir")
+ log.info("cephfs-shell output:\n{}".format(o))
+
+ # create the snapshot - must pass
+ o = self.get_cephfs_shell_cmd_output("snap create snap1 /data_dir")
+ log.info("cephfs-shell output:\n{}".format(o))
+ self.assertEqual("", o)
+ o = self.mount_a.stat(sdn)
+ log.info("mount_a output:\n{}".format(o))
+ self.assertIn('st_mode', o)
+
+ # create the same snapshot again - must fail with an error message
+ self.negtest_cephfs_shell_cmd(cmd="snap create snap1 /data_dir",
+ errmsg="snapshot 'snap1' already exists")
+ o = self.mount_a.stat(sdn)
+ log.info("mount_a output:\n{}".format(o))
+ self.assertIn('st_mode', o)
+
+ # delete the snapshot - must pass
+ o = self.get_cephfs_shell_cmd_output("snap delete snap1 /data_dir")
+ log.info("cephfs-shell output:\n{}".format(o))
+ self.assertEqual("", o)
+ try:
+ o = self.mount_a.stat(sdn)
+ except CommandFailedError:
+ # snap dir should not exist anymore
+ pass
+ log.info("mount_a output:\n{}".format(o))
+ self.assertNotIn('st_mode', o)
+
+ # delete the same snapshot again - must fail with an error message
+ self.negtest_cephfs_shell_cmd(cmd="snap delete snap1 /data_dir",
+ errmsg="'snap1': no such snapshot")
+ try:
+ o = self.mount_a.stat(sdn)
+ except CommandFailedError:
+ pass
+ log.info("mount_a output:\n{}".format(o))
+ self.assertNotIn('st_mode', o)
+
+
+class TestCD(TestCephFSShell):
+ CLIENTS_REQUIRED = 1
+
+ def test_cd_with_no_args(self):
+ """
+ Test that when cd is issued without any arguments, CWD is changed
+ to root directory.
+ """
+ path = 'dir1/dir2/dir3'
+ self.mount_a.run_shell_payload(f"mkdir -p {path}")
+ expected_cwd = '/'
+
+ script = 'cd {}\ncd\ncwd\n'.format(path)
+ output = self.get_cephfs_shell_script_output(script)
+ self.assertEqual(output, expected_cwd)
+
+ def test_cd_with_args(self):
+ """
+ Test that when cd is issued with an argument, CWD is changed
+ to the path passed in the argument.
+ """
+ path = 'dir1/dir2/dir3'
+ self.mount_a.run_shell_payload(f"mkdir -p {path}")
+ expected_cwd = '/dir1/dir2/dir3'
+
+ script = 'cd {}\ncwd\n'.format(path)
+ output = self.get_cephfs_shell_script_output(script)
+ self.assertEqual(output, expected_cwd)
+
+
+class TestDU(TestCephFSShell):
+ CLIENTS_REQUIRED = 1
+
+ def test_du_works_for_regfiles(self):
+ regfilename = 'some_regfile'
+ regfile_abspath = path.join(self.mount_a.mountpoint, regfilename)
+ self.mount_a.client_remote.write_file(regfile_abspath, 'somedata')
+
+ size = humansize(self.mount_a.stat(regfile_abspath)['st_size'])
+ expected_output = r'{}{}{}'.format(size, " +", regfilename)
+
+ du_output = self.get_cephfs_shell_cmd_output('du ' + regfilename)
+ self.assertRegex(du_output, expected_output)
+
+ def test_du_works_for_non_empty_dirs(self):
+ dirname = 'some_directory'
+ dir_abspath = path.join(self.mount_a.mountpoint, dirname)
+ regfilename = 'some_regfile'
+ regfile_abspath = path.join(dir_abspath, regfilename)
+ self.mount_a.run_shell_payload(f"mkdir {dir_abspath}")
+ self.mount_a.client_remote.write_file(regfile_abspath, 'somedata')
+
+ # XXX: we stat `regfile_abspath` here because ceph du reports
+ # a non-empty
+ # directory's size as sum of sizes of all files under it.
+ size = humansize(self.mount_a.stat(regfile_abspath)['st_size'])
+ expected_output = r'{}{}{}'.format(size, " +", dirname)
+
+ sleep(10)
+ du_output = self.get_cephfs_shell_cmd_output('du ' + dirname)
+ self.assertRegex(du_output, expected_output)
+
+ def test_du_works_for_empty_dirs(self):
+ dirname = 'some_directory'
+ dir_abspath = path.join(self.mount_a.mountpoint, dirname)
+ self.mount_a.run_shell_payload(f"mkdir {dir_abspath}")
+
+ size = humansize(self.mount_a.stat(dir_abspath)['st_size'])
+ expected_output = r'{}{}{}'.format(size, " +", dirname)
+
+ du_output = self.get_cephfs_shell_cmd_output('du ' + dirname)
+ self.assertRegex(du_output, expected_output)
+
+ def test_du_works_for_hardlinks(self):
+ regfilename = 'some_regfile'
+ regfile_abspath = path.join(self.mount_a.mountpoint, regfilename)
+ self.mount_a.client_remote.write_file(regfile_abspath, 'somedata')
+ hlinkname = 'some_hardlink'
+ hlink_abspath = path.join(self.mount_a.mountpoint, hlinkname)
+ self.mount_a.run_shell_payload(f"ln {regfile_abspath} {hlink_abspath}")
+
+ size = humansize(self.mount_a.stat(hlink_abspath)['st_size'])
+ expected_output = r'{}{}{}'.format(size, " +", hlinkname)
+
+ du_output = self.get_cephfs_shell_cmd_output('du ' + hlinkname)
+ self.assertRegex(du_output, expected_output)
+
+ def test_du_works_for_softlinks_to_files(self):
+ regfilename = 'some_regfile'
+ regfile_abspath = path.join(self.mount_a.mountpoint, regfilename)
+ self.mount_a.client_remote.write_file(regfile_abspath, 'somedata')
+ slinkname = 'some_softlink'
+ slink_abspath = path.join(self.mount_a.mountpoint, slinkname)
+ self.mount_a.run_shell_payload(
+ f"ln -s {regfile_abspath} {slink_abspath}")
+
+ size = humansize(self.mount_a.lstat(slink_abspath)['st_size'])
+ expected_output = r'{}{}{}'.format(size, " +", slinkname)
+
+ du_output = self.get_cephfs_shell_cmd_output('du ' + slinkname)
+ self.assertRegex(du_output, expected_output)
+
+ def test_du_works_for_softlinks_to_dirs(self):
+ dirname = 'some_directory'
+ dir_abspath = path.join(self.mount_a.mountpoint, dirname)
+ self.mount_a.run_shell_payload(f"mkdir {dir_abspath}")
+ slinkname = 'some_softlink'
+ slink_abspath = path.join(self.mount_a.mountpoint, slinkname)
+ self.mount_a.run_shell_payload(f"ln -s {dir_abspath} {slink_abspath}")
+
+ size = humansize(self.mount_a.lstat(slink_abspath)['st_size'])
+ expected_output = r'{}{}{}'.format(size, " +", slinkname)
+
+ du_output = self.get_cephfs_shell_cmd_output('du ' + slinkname)
+ self.assertRegex(du_output, expected_output)
+
+ # NOTE: tests using these are pretty slow since to this methods sleeps for
+ # 15 seconds
+ def _setup_files(self, return_path_to_files=False, path_prefix='./'):
+ dirname = 'dir1'
+ regfilename = 'regfile'
+ hlinkname = 'hlink'
+ slinkname = 'slink1'
+ slink2name = 'slink2'
+
+ dir_abspath = path.join(self.mount_a.mountpoint, dirname)
+ regfile_abspath = path.join(self.mount_a.mountpoint, regfilename)
+ hlink_abspath = path.join(self.mount_a.mountpoint, hlinkname)
+ slink_abspath = path.join(self.mount_a.mountpoint, slinkname)
+ slink2_abspath = path.join(self.mount_a.mountpoint, slink2name)
+
+ self.mount_a.run_shell_payload(f"mkdir {dir_abspath}")
+ self.mount_a.run_shell_payload(f"touch {regfile_abspath}")
+ self.mount_a.run_shell_payload(f"ln {regfile_abspath} {hlink_abspath}")
+ self.mount_a.run_shell_payload(
+ f"ln -s {regfile_abspath} {slink_abspath}")
+ self.mount_a.run_shell_payload(f"ln -s {dir_abspath} {slink2_abspath}")
+
+ dir2_name = 'dir2'
+ dir21_name = 'dir21'
+ regfile121_name = 'regfile121'
+ dir2_abspath = path.join(self.mount_a.mountpoint, dir2_name)
+ dir21_abspath = path.join(dir2_abspath, dir21_name)
+ regfile121_abspath = path.join(dir21_abspath, regfile121_name)
+ self.mount_a.run_shell_payload(f"mkdir -p {dir21_abspath}")
+ self.mount_a.run_shell_payload(f"touch {regfile121_abspath}")
+
+ self.mount_a.client_remote.write_file(regfile_abspath, 'somedata')
+ self.mount_a.client_remote.write_file(regfile121_abspath,
+ 'somemoredata')
+
+ # TODO: is there a way to trigger/force update ceph.dir.rbytes?
+ # wait so that attr ceph.dir.rbytes gets a chance to be updated.
+ sleep(20)
+
+ expected_patterns = []
+ path_to_files = []
+
+ def append_expected_output_pattern(f):
+ if f == '/':
+ expected_patterns.append(r'{}{}{}'.format(size, " +", '.' + f))
+ else:
+ expected_patterns.append(r'{}{}{}'.format(
+ size, " +",
+ path_prefix + path.relpath(f, self.mount_a.mountpoint)))
+
+ for f in [dir_abspath, regfile_abspath, regfile121_abspath,
+ hlink_abspath, slink_abspath, slink2_abspath]:
+ size = humansize(self.mount_a.stat(
+ f, follow_symlinks=False)['st_size'])
+ append_expected_output_pattern(f)
+
+ # get size for directories containig regfiles within
+ for f in [dir2_abspath, dir21_abspath]:
+ size = humansize(self.mount_a.stat(regfile121_abspath,
+ follow_symlinks=False)[
+ 'st_size'])
+ append_expected_output_pattern(f)
+
+ # get size for CephFS root
+ size = 0
+ for f in [regfile_abspath, regfile121_abspath, slink_abspath,
+ slink2_abspath]:
+ size += self.mount_a.stat(f, follow_symlinks=False)['st_size']
+ size = humansize(size)
+ append_expected_output_pattern('/')
+
+ if return_path_to_files:
+ for p in [dir_abspath, regfile_abspath, dir2_abspath,
+ dir21_abspath, regfile121_abspath, hlink_abspath,
+ slink_abspath, slink2_abspath]:
+ path_to_files.append(path.relpath(p, self.mount_a.mountpoint))
+
+ return expected_patterns, path_to_files
+ else:
+ return expected_patterns
+
+ def test_du_works_recursively_with_no_path_in_args(self):
+ expected_patterns_in_output = self._setup_files()
+ du_output = self.get_cephfs_shell_cmd_output('du -r')
+
+ for expected_output in expected_patterns_in_output:
+ self.assertRegex(du_output, expected_output)
+
+ def test_du_with_path_in_args(self):
+ expected_patterns_in_output, path_to_files = self._setup_files(
+ True, path_prefix='')
+
+ args = ['du', '/']
+ for p in path_to_files:
+ args.append(p)
+ du_output = self.get_cephfs_shell_cmd_output(args)
+
+ for expected_output in expected_patterns_in_output:
+ self.assertRegex(du_output, expected_output)
+
+ def test_du_with_no_args(self):
+ expected_patterns_in_output = self._setup_files()
+
+ du_output = self.get_cephfs_shell_cmd_output('du')
+
+ for expected_output in expected_patterns_in_output:
+ # Since CWD is CephFS root and being non-recursive expect only
+ # CWD in DU report.
+ if expected_output.find('/') == len(expected_output) - 1:
+ self.assertRegex(du_output, expected_output)
+
+
+class TestDF(TestCephFSShell):
+ def validate_df(self, filename):
+ df_output = self.get_cephfs_shell_cmd_output('df ' + filename)
+ log.info("cephfs-shell df output:\n{}".format(df_output))
+
+ shell_df = df_output.splitlines()[1].split()
+
+ block_size = int(self.mount_a.df()["total"]) // 1024
+ log.info("cephfs df block size output:{}\n".format(block_size))
+
+ st_size = int(self.mount_a.stat(filename)["st_size"])
+ log.info("cephfs stat used output:{}".format(st_size))
+ log.info("cephfs available:{}\n".format(block_size - st_size))
+
+ self.assertTupleEqual((block_size, st_size, block_size - st_size),
+ (int(shell_df[0]), int(shell_df[1]),
+ int(shell_df[2])))
+
+ def test_df_with_no_args(self):
+ expected_output = ''
+ df_output = self.get_cephfs_shell_cmd_output('df')
+ assert df_output == expected_output
+
+ def test_df_for_valid_directory(self):
+ dir_name = 'dir1'
+ mount_output = self.mount_a.run_shell_payload(f"mkdir {dir_name}")
+ log.info("cephfs-shell mount output:\n{}".format(mount_output))
+ self.validate_df(dir_name)
+
+ def test_df_for_invalid_directory(self):
+ dir_abspath = path.join(self.mount_a.mountpoint, 'non-existent-dir')
+ self.negtest_cephfs_shell_cmd(cmd='df ' + dir_abspath,
+ errmsg='error in stat')
+
+ def test_df_for_valid_file(self):
+ s = 'df test' * 14145016
+ o = self.get_cephfs_shell_cmd_output("put - dumpfile", stdin=s)
+ log.info("cephfs-shell output:\n{}".format(o))
+ self.validate_df("dumpfile")
+
+
+class TestQuota(TestCephFSShell):
+ dir_name = 'testdir'
+
+ def create_dir(self):
+ mount_output = self.get_cephfs_shell_cmd_output(
+ 'mkdir ' + self.dir_name)
+ log.info("cephfs-shell mount output:\n{}".format(mount_output))
+
+ def set_and_get_quota_vals(self, input_val, check_status=True):
+ self.run_cephfs_shell_cmd(['quota', 'set', '--max_bytes',
+ input_val[0], '--max_files', input_val[1],
+ self.dir_name], check_status=check_status)
+
+ quota_output = self.get_cephfs_shell_cmd_output(
+ ['quota', 'get', self.dir_name],
+ check_status=check_status)
+
+ quota_output = quota_output.split()
+ return quota_output[1], quota_output[3]
+
+ def test_set(self):
+ self.create_dir()
+ set_values = ('6', '2')
+ self.assertTupleEqual(self.set_and_get_quota_vals(set_values),
+ set_values)
+
+ def test_replace_values(self):
+ self.test_set()
+ set_values = ('20', '4')
+ self.assertTupleEqual(self.set_and_get_quota_vals(set_values),
+ set_values)
+
+ def test_set_invalid_dir(self):
+ set_values = ('5', '5')
+ try:
+ self.assertTupleEqual(self.set_and_get_quota_vals(
+ set_values, False), set_values)
+ raise Exception(
+ "Something went wrong!! Values set for non existing directory")
+ except IndexError:
+ # Test should pass as values cannot be set for non
+ # existing directory
+ pass
+
+ def test_set_invalid_values(self):
+ self.create_dir()
+ set_values = ('-6', '-5')
+ try:
+ self.assertTupleEqual(self.set_and_get_quota_vals(set_values,
+ False),
+ set_values)
+ raise Exception("Something went wrong!! Invalid values set")
+ except IndexError:
+ # Test should pass as invalid values cannot be set
+ pass
+
+ def test_exceed_file_limit(self):
+ self.test_set()
+ dir_abspath = path.join(self.mount_a.mountpoint, self.dir_name)
+ self.mount_a.run_shell_payload(f"touch {dir_abspath}/file1")
+ file2 = path.join(dir_abspath, "file2")
+ try:
+ self.mount_a.run_shell_payload(f"touch {file2}")
+ raise Exception(
+ "Something went wrong!! File creation should have failed")
+ except CommandFailedError:
+ # Test should pass as file quota set to 2
+ # Additional condition to confirm file creation failure
+ if not path.exists(file2):
+ return 0
+ raise
+
+ def test_exceed_write_limit(self):
+ self.test_set()
+ dir_abspath = path.join(self.mount_a.mountpoint, self.dir_name)
+ filename = 'test_file'
+ file_abspath = path.join(dir_abspath, filename)
+ try:
+ # Write should fail as bytes quota is set to 6
+ self.mount_a.client_remote.write_file(file_abspath,
+ 'Disk raise Exception')
+ raise Exception("Write should have failed")
+ except CommandFailedError:
+ # Test should pass only when write command fails
+ path_exists = path.exists(file_abspath)
+ if not path_exists:
+ # Testing with teuthology: No file is created.
+ return 0
+ elif path_exists and not path.getsize(file_abspath):
+ # Testing on Fedora 30: When write fails, empty
+ # file gets created.
+ return 0
+ else:
+ raise
+
+
+class TestXattr(TestCephFSShell):
+ dir_name = 'testdir'
+
+ def create_dir(self):
+ self.run_cephfs_shell_cmd('mkdir ' + self.dir_name)
+
+ def set_get_list_xattr_vals(self, input_val, negtest=False):
+ setxattr_output = self.get_cephfs_shell_cmd_output(
+ ['setxattr', self.dir_name, input_val[0], input_val[1]])
+ log.info("cephfs-shell setxattr output:\n{}".format(setxattr_output))
+
+ getxattr_output = self.get_cephfs_shell_cmd_output(
+ ['getxattr', self.dir_name, input_val[0]])
+ log.info("cephfs-shell getxattr output:\n{}".format(getxattr_output))
+
+ listxattr_output = self.get_cephfs_shell_cmd_output(
+ ['listxattr', self.dir_name])
+ log.info("cephfs-shell listxattr output:\n{}".format(listxattr_output))
+
+ return listxattr_output, getxattr_output
+
+ def test_set(self):
+ self.create_dir()
+ set_values = ('user.key', '2')
+ self.assertTupleEqual(self.set_get_list_xattr_vals(set_values),
+ set_values)
+
+ def test_reset(self):
+ self.test_set()
+ set_values = ('user.key', '4')
+ self.assertTupleEqual(self.set_get_list_xattr_vals(set_values),
+ set_values)
+
+ def test_non_existing_dir(self):
+ input_val = ('user.key', '9')
+ self.negtest_cephfs_shell_cmd(
+ cmd=['setxattr', self.dir_name, input_val[0],
+ input_val[1]])
+ self.negtest_cephfs_shell_cmd(
+ cmd=['getxattr', self.dir_name, input_val[0]])
+ self.negtest_cephfs_shell_cmd(cmd=['listxattr', self.dir_name])
+
+
+class TestLS(TestCephFSShell):
+ dir_name = 'test_dir'
+ hidden_dir_name = '.test_hidden_dir'
+
+ def test_ls(self):
+ """ Test that ls prints files in CWD. """
+ self.run_cephfs_shell_cmd(f'mkdir {self.dir_name}')
+
+ ls_output = self.get_cephfs_shell_cmd_output("ls")
+ log.info(f"output of ls command:\n{ls_output}")
+
+ self.assertIn(self.dir_name, ls_output)
+
+ def test_ls_a(self):
+ """ Test ls -a prints hidden files in CWD."""
+
+ self.run_cephfs_shell_cmd(f'mkdir {self.hidden_dir_name}')
+
+ ls_a_output = self.get_cephfs_shell_cmd_output(['ls', '-a'])
+ log.info(f"output of ls -a command:\n{ls_a_output}")
+
+ self.assertIn(self.hidden_dir_name, ls_a_output)
+
+ def test_ls_does_not_print_hidden_dir(self):
+ """ Test ls command does not print hidden directory """
+
+ self.run_cephfs_shell_cmd(f'mkdir {self.hidden_dir_name}')
+
+ ls_output = self.get_cephfs_shell_cmd_output("ls")
+ log.info(f"output of ls command:\n{ls_output}")
+
+ self.assertNotIn(self.hidden_dir_name, ls_output)
+
+ def test_ls_a_prints_non_hidden_dir(self):
+ """ Test ls -a command prints non hidden directory """
+
+ self.run_cephfs_shell_cmd(
+ f'mkdir {self.hidden_dir_name} {self.dir_name}')
+
+ ls_a_output = self.get_cephfs_shell_cmd_output(['ls', '-a'])
+ log.info(f"output of ls -a command:\n{ls_a_output}")
+
+ self.assertIn(self.dir_name, ls_a_output)
+
+ def test_ls_H_prints_human_readable_file_size(self):
+ """ Test "ls -lH" prints human readable file size."""
+
+ file_sizes = ['1', '1K', '1M', '1G']
+ file_names = ['dump1', 'dump2', 'dump3', 'dump4']
+
+ for (file_size, file_name) in zip(file_sizes, file_names):
+ temp_file = self.mount_a.client_remote.mktemp(file_name)
+ self.mount_a.run_shell_payload(
+ f"fallocate -l {file_size} {temp_file}")
+ self.mount_a.run_shell_payload(f'mv {temp_file} ./')
+
+ ls_H_output = self.get_cephfs_shell_cmd_output(['ls', '-lH'])
+
+ ls_H_file_size = set()
+ for line in ls_H_output.split('\n'):
+ ls_H_file_size.add(line.split()[1])
+
+ # test that file sizes are in human readable format
+ self.assertEqual({'1B', '1K', '1M', '1G'}, ls_H_file_size)
+
+ def test_ls_s_sort_by_size(self):
+ """ Test "ls -S" sorts file listing by file_size """
+ test_file1 = "test_file1.txt"
+ test_file2 = "test_file2.txt"
+ file1_content = 'A' * 102
+ file2_content = 'B' * 10
+
+ self.run_cephfs_shell_cmd(f"write {test_file1}", stdin=file1_content)
+ self.run_cephfs_shell_cmd(f"write {test_file2}", stdin=file2_content)
+
+ ls_s_output = self.get_cephfs_shell_cmd_output(['ls', '-lS'])
+
+ file_sizes = []
+ for line in ls_s_output.split('\n'):
+ file_sizes.append(line.split()[1])
+
+ # test that file size are in ascending order
+ self.assertEqual(file_sizes, sorted(file_sizes))
+
+
+class TestMisc(TestCephFSShell):
+ def test_issue_cephfs_shell_cmd_at_invocation(self):
+ """
+ Test that `cephfs-shell -c conf cmd` works.
+ """
+ # choosing a long name since short ones have a higher probability
+ # of getting matched by coincidence.
+ dirname = 'somedirectory'
+ self.run_cephfs_shell_cmd(['mkdir', dirname])
+
+ output = self.mount_a.client_remote.sh(['cephfs-shell', 'ls']). \
+ strip()
+
+ self.assertRegex(output, dirname)
+
+ def test_help(self):
+ """
+ Test that help outputs commands.
+ """
+ o = self.get_cephfs_shell_cmd_output("help all")
+ log.info("output:\n{}".format(o))
+
+
+ def test_chmod(self):
+ """Test chmod is allowed above o0777 """
+
+ test_file1 = "test_file2.txt"
+ file1_content = 'A' * 102
+ self.run_cephfs_shell_cmd(f"write {test_file1}", stdin=file1_content)
+ self.run_cephfs_shell_cmd(f"chmod 01777 {test_file1}")
+
+class TestShellOpts(TestCephFSShell):
+ """
+ Contains tests for shell options from conf file and shell prompt.
+ """
+
+ def setUp(self):
+ super(type(self), self).setUp()
+
+ # output of following command -
+ # editor - was: 'vim'
+ # now: '?'
+ # editor: '?'
+ self.editor_val = self.get_cephfs_shell_cmd_output(
+ 'set editor ?, set editor').split('\n')[2]
+ self.editor_val = self.editor_val.split(':')[1]. \
+ replace("'", "", 2).strip()
+
+ def write_tempconf(self, confcontents):
+ self.tempconfpath = self.mount_a.client_remote.mktemp(
+ suffix='cephfs-shell.conf')
+ self.mount_a.client_remote.write_file(self.tempconfpath,
+ confcontents)
+
+ def test_reading_conf(self):
+ self.write_tempconf("[cephfs-shell]\neditor = ???")
+
+ # output of following command -
+ # CephFS:~/>>> set editor
+ # editor: 'vim'
+ final_editor_val = self.get_cephfs_shell_cmd_output(
+ cmd='set editor', shell_conf_path=self.tempconfpath)
+ final_editor_val = final_editor_val.split(': ')[1]
+ final_editor_val = final_editor_val.replace("'", "", 2)
+
+ self.assertNotEqual(self.editor_val, final_editor_val)
+
+ def test_reading_conf_with_dup_opt(self):
+ """
+ Read conf without duplicate sections/options.
+ """
+ self.write_tempconf("[cephfs-shell]\neditor = ???\neditor = " +
+ self.editor_val)
+
+ # output of following command -
+ # CephFS:~/>>> set editor
+ # editor: 'vim'
+ final_editor_val = self.get_cephfs_shell_cmd_output(
+ cmd='set editor', shell_conf_path=self.tempconfpath)
+ final_editor_val = final_editor_val.split(': ')[1]
+ final_editor_val = final_editor_val.replace("'", "", 2)
+
+ self.assertEqual(self.editor_val, final_editor_val)
+
+ def test_setting_opt_after_reading_conf(self):
+ self.write_tempconf("[cephfs-shell]\neditor = ???")
+
+ # output of following command -
+ # editor - was: vim
+ # now: vim
+ # editor: vim
+ final_editor_val = self.get_cephfs_shell_cmd_output(
+ cmd='set editor %s, set editor' % self.editor_val,
+ shell_conf_path=self.tempconfpath)
+ final_editor_val = final_editor_val.split('\n')[2]
+ final_editor_val = final_editor_val.split(': ')[1]
+ final_editor_val = final_editor_val.replace("'", "", 2)
+
+ self.assertEqual(self.editor_val, final_editor_val)
diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py
new file mode 100644
index 000000000..c4215df33
--- /dev/null
+++ b/qa/tasks/cephfs/test_client_limits.py
@@ -0,0 +1,397 @@
+
+"""
+Exercise the MDS's behaviour when clients and the MDCache reach or
+exceed the limits of how many caps/inodes they should hold.
+"""
+
+import logging
+from textwrap import dedent
+from tasks.ceph_test_case import TestTimeoutError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, needs_trimming
+from tasks.cephfs.fuse_mount import FuseMount
+from teuthology.exceptions import CommandFailedError
+import os
+from io import StringIO
+
+
+log = logging.getLogger(__name__)
+
+
+# Arbitrary timeouts for operations involving restarting
+# an MDS or waiting for it to come up
+MDS_RESTART_GRACE = 60
+
+# Hardcoded values from Server::recall_client_state
+CAP_RECALL_RATIO = 0.8
+CAP_RECALL_MIN = 100
+
+
+class TestClientLimits(CephFSTestCase):
+ CLIENTS_REQUIRED = 2
+
+ def _test_client_pin(self, use_subdir, open_files):
+ """
+ When a client pins an inode in its cache, for example because the file is held open,
+ it should reject requests from the MDS to trim these caps. The MDS should complain
+ to the user that it is unable to enforce its cache size limits because of this
+ objectionable client.
+
+ :param use_subdir: whether to put test files in a subdir or use root
+ """
+
+ # Set MDS cache memory limit to a low value that will make the MDS to
+ # ask the client to trim the caps.
+ cache_memory_limit = "1K"
+
+ self.config_set('mds', 'mds_cache_memory_limit', cache_memory_limit)
+ self.config_set('mds', 'mds_recall_max_caps', int(open_files/2))
+ self.config_set('mds', 'mds_recall_warning_threshold', open_files)
+
+ mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client"))
+ self.config_set('mds', 'mds_min_caps_working_set', mds_min_caps_per_client)
+ mds_max_caps_per_client = int(self.config_get('mds', "mds_max_caps_per_client"))
+ mds_recall_warning_decay_rate = float(self.config_get('mds', "mds_recall_warning_decay_rate"))
+ self.assertGreaterEqual(open_files, mds_min_caps_per_client)
+
+ mount_a_client_id = self.mount_a.get_global_id()
+ path = "subdir" if use_subdir else "."
+ open_proc = self.mount_a.open_n_background(path, open_files)
+
+ # Client should now hold:
+ # `open_files` caps for the open files
+ # 1 cap for root
+ # 1 cap for subdir
+ self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'],
+ open_files + (2 if use_subdir else 1),
+ timeout=600,
+ reject_fn=lambda x: x > open_files + 2)
+
+ # MDS should not be happy about that, as the client is failing to comply
+ # with the SESSION_RECALL messages it is being sent
+ self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2)
+
+ # We can also test that the MDS health warning for oversized
+ # cache is functioning as intended.
+ self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2)
+
+ # When the client closes the files, it should retain only as many caps as allowed
+ # under the SESSION_RECALL policy
+ log.info("Terminating process holding files open")
+ self.mount_a._kill_background(open_proc)
+
+ # The remaining caps should comply with the numbers sent from MDS in SESSION_RECALL message,
+ # which depend on the caps outstanding, cache size and overall ratio
+ def expected_caps():
+ num_caps = self.get_session(mount_a_client_id)['num_caps']
+ if num_caps <= mds_min_caps_per_client:
+ return True
+ elif num_caps <= mds_max_caps_per_client:
+ return True
+ else:
+ return False
+
+ self.wait_until_true(expected_caps, timeout=60)
+
+ @needs_trimming
+ def test_client_pin_root(self):
+ self._test_client_pin(False, 400)
+
+ @needs_trimming
+ def test_client_pin(self):
+ self._test_client_pin(True, 800)
+
+ @needs_trimming
+ def test_client_pin_mincaps(self):
+ self._test_client_pin(True, 200)
+
+ def test_client_min_caps_working_set(self):
+ """
+ When a client has inodes pinned in its cache (open files), that the MDS
+ will not warn about the client not responding to cache pressure when
+ the number of caps is below mds_min_caps_working_set.
+ """
+
+ # Set MDS cache memory limit to a low value that will make the MDS to
+ # ask the client to trim the caps.
+ cache_memory_limit = "1K"
+ open_files = 400
+
+ self.config_set('mds', 'mds_cache_memory_limit', cache_memory_limit)
+ self.config_set('mds', 'mds_recall_max_caps', int(open_files/2))
+ self.config_set('mds', 'mds_recall_warning_threshold', open_files)
+ self.config_set('mds', 'mds_min_caps_working_set', open_files*2)
+
+ mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client"))
+ mds_recall_warning_decay_rate = float(self.config_get('mds', "mds_recall_warning_decay_rate"))
+ self.assertGreaterEqual(open_files, mds_min_caps_per_client)
+
+ mount_a_client_id = self.mount_a.get_global_id()
+ self.mount_a.open_n_background("subdir", open_files)
+
+ # Client should now hold:
+ # `open_files` caps for the open files
+ # 1 cap for root
+ # 1 cap for subdir
+ self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'],
+ open_files + 2,
+ timeout=600,
+ reject_fn=lambda x: x > open_files + 2)
+
+ # We can also test that the MDS health warning for oversized
+ # cache is functioning as intended.
+ self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2)
+
+ try:
+ # MDS should not be happy about that but it's not sending
+ # MDS_CLIENT_RECALL warnings because the client's caps are below
+ # mds_min_caps_working_set.
+ self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2)
+ except TestTimeoutError:
+ pass
+ else:
+ raise RuntimeError("expected no client recall warning")
+
+ def test_cap_acquisition_throttle_readdir(self):
+ """
+ Mostly readdir acquires caps faster than the mds recalls, so the cap
+ acquisition via readdir is throttled by retrying the readdir after
+ a fraction of second (0.5) by default when throttling condition is met.
+ """
+
+ subdir_count = 4
+ files_per_dir = 25
+
+ # throttle in a way so that two dir reads are already hitting it.
+ throttle_value = (files_per_dir * 3) // 2
+
+ # activate throttling logic by setting max per client to a low value
+ self.config_set('mds', 'mds_max_caps_per_client', 1)
+ self.config_set('mds', 'mds_session_cap_acquisition_throttle', throttle_value)
+
+ # Create files split across {subdir_count} directories, {per_dir_count} in each dir
+ for i in range(1, subdir_count+1):
+ self.mount_a.create_n_files("dir{0}/file".format(i), files_per_dir, sync=True)
+
+ mount_a_client_id = self.mount_a.get_global_id()
+
+ # recursive readdir. macOs wants an explicit directory for `find`.
+ proc = self.mount_a.run_shell_payload("find . | wc", stderr=StringIO())
+ # return code may be None if the command got interrupted
+ self.assertTrue(proc.returncode is None or proc.returncode == 0, proc.stderr.getvalue())
+
+ # validate the throttle condition to be hit atleast once
+ cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle']
+ self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1)
+
+ # validate cap_acquisition decay counter after readdir to NOT exceed the throttle value
+ # plus one batch that could have been taken immediately before querying
+ # assuming the batch is equal to the per dir file count.
+ cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value']
+ self.assertLessEqual(cap_acquisition_value, files_per_dir + throttle_value)
+
+ # make sure that the throttle was reported in the events
+ def historic_ops_have_event(expected_event):
+ ops_dump = self.fs.rank_tell(['dump_historic_ops'])
+ # reverse the events and the ops assuming that later ops would be throttled
+ for op in reversed(ops_dump['ops']):
+ for ev in reversed(op.get('type_data', {}).get('events', [])):
+ if ev['event'] == expected_event:
+ return True
+ return False
+
+ self.assertTrue(historic_ops_have_event('cap_acquisition_throttle'))
+
+ def test_client_release_bug(self):
+ """
+ When a client has a bug (which we will simulate) preventing it from releasing caps,
+ the MDS should notice that releases are not being sent promptly, and generate a health
+ metric to that effect.
+ """
+
+ # The debug hook to inject the failure only exists in the fuse client
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("Require FUSE client to inject client release failure")
+
+ self.set_conf('client.{0}'.format(self.mount_a.client_id), 'client inject release failure', 'true')
+ self.mount_a.teardown()
+ self.mount_a.mount_wait()
+ mount_a_client_id = self.mount_a.get_global_id()
+
+ # Client A creates a file. He will hold the write caps on the file, and later (simulated bug) fail
+ # to comply with the MDSs request to release that cap
+ self.mount_a.run_shell(["touch", "file1"])
+
+ # Client B tries to stat the file that client A created
+ rproc = self.mount_b.write_background("file1")
+
+ # After session_timeout, we should see a health warning (extra lag from
+ # MDS beacon period)
+ session_timeout = self.fs.get_var("session_timeout")
+ self.wait_for_health("MDS_CLIENT_LATE_RELEASE", session_timeout + 10)
+
+ # Client B should still be stuck
+ self.assertFalse(rproc.finished)
+
+ # Kill client A
+ self.mount_a.kill()
+ self.mount_a.kill_cleanup()
+
+ # Client B should complete
+ self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+ rproc.wait()
+
+ def test_client_blocklisted_oldest_tid(self):
+ """
+ that a client is blocklisted when its encoded session metadata exceeds the
+ configured threshold (due to ever growing `completed_requests` caused due
+ to an unidentified bug (in the client or the MDS)).
+ """
+
+ # num of requests client issues
+ max_requests = 10000
+
+ # The debug hook to inject the failure only exists in the fuse client
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("Require FUSE client to inject client release failure")
+
+ self.config_set('client', 'client inject fixed oldest tid', 'true')
+ self.mount_a.teardown()
+ self.mount_a.mount_wait()
+
+ self.config_set('mds', 'mds_max_completed_requests', max_requests);
+
+ # Create lots of files
+ self.mount_a.create_n_files("testdir/file1", max_requests + 100)
+
+ # Create a few files synchronously. This makes sure previous requests are completed
+ self.mount_a.create_n_files("testdir/file2", 5, True)
+
+ # Wait for the health warnings. Assume mds can handle 10 request per second at least
+ self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests // 10, check_in_detail=str(self.mount_a.client_id))
+
+ # set the threshold low so that it has a high probability of
+ # hitting.
+ self.config_set('mds', 'mds_session_metadata_threshold', 5000);
+
+ # Create lot many files synchronously. This would hit the session metadata threshold
+ # causing the client to get blocklisted.
+ with self.assertRaises(CommandFailedError):
+ self.mount_a.create_n_files("testdir/file2", 100000, True)
+
+ self.mds_cluster.is_addr_blocklisted(self.mount_a.get_global_addr())
+ # the mds should bump up the relevant perf counter
+ pd = self.perf_dump()
+ self.assertGreater(pd['mds_sessions']['mdthresh_evicted'], 0)
+
+ # reset the config
+ self.config_set('client', 'client inject fixed oldest tid', 'false')
+
+ self.mount_a.kill_cleanup()
+ self.mount_a.mount_wait()
+
+ def test_client_oldest_tid(self):
+ """
+ When a client does not advance its oldest tid, the MDS should notice that
+ and generate health warnings.
+ """
+
+ # num of requests client issues
+ max_requests = 1000
+
+ # The debug hook to inject the failure only exists in the fuse client
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("Require FUSE client to inject client release failure")
+
+ self.set_conf('client', 'client inject fixed oldest tid', 'true')
+ self.mount_a.teardown()
+ self.mount_a.mount_wait()
+
+ self.fs.mds_asok(['config', 'set', 'mds_max_completed_requests', '{0}'.format(max_requests)])
+
+ # Create lots of files
+ self.mount_a.create_n_files("testdir/file1", max_requests + 100)
+
+ # Create a few files synchronously. This makes sure previous requests are completed
+ self.mount_a.create_n_files("testdir/file2", 5, True)
+
+ # Wait for the health warnings. Assume mds can handle 10 request per second at least
+ self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests // 10)
+
+ def _test_client_cache_size(self, mount_subdir):
+ """
+ check if client invalidate kernel dcache according to its cache size config
+ """
+
+ # The debug hook to inject the failure only exists in the fuse client
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("Require FUSE client to inject client release failure")
+
+ if mount_subdir:
+ # fuse assigns a fix inode number (1) to root inode. But in mounting into
+ # subdir case, the actual inode number of root is not 1. This mismatch
+ # confuses fuse_lowlevel_notify_inval_entry() when invalidating dentries
+ # in root directory.
+ self.mount_a.run_shell(["mkdir", "subdir"])
+ self.mount_a.umount_wait()
+ self.set_conf('client', 'client mountpoint', '/subdir')
+ self.mount_a.mount_wait()
+ root_ino = self.mount_a.path_to_ino(".")
+ self.assertEqual(root_ino, 1);
+
+ dir_path = os.path.join(self.mount_a.mountpoint, "testdir")
+
+ mkdir_script = dedent("""
+ import os
+ os.mkdir("{path}")
+ for n in range(0, {num_dirs}):
+ os.mkdir("{path}/dir{{0}}".format(n))
+ """)
+
+ num_dirs = 1000
+ self.mount_a.run_python(mkdir_script.format(path=dir_path, num_dirs=num_dirs))
+ self.mount_a.run_shell(["sync"])
+
+ dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count()
+ self.assertGreaterEqual(dentry_count, num_dirs)
+ self.assertGreaterEqual(dentry_pinned_count, num_dirs)
+
+ cache_size = num_dirs // 10
+ self.mount_a.set_cache_size(cache_size)
+
+ def trimmed():
+ dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count()
+ log.info("waiting, dentry_count, dentry_pinned_count: {0}, {1}".format(
+ dentry_count, dentry_pinned_count
+ ))
+ if dentry_count > cache_size or dentry_pinned_count > cache_size:
+ return False
+
+ return True
+
+ self.wait_until_true(trimmed, 30)
+
+ @needs_trimming
+ def test_client_cache_size(self):
+ self._test_client_cache_size(False)
+ self._test_client_cache_size(True)
+
+ def test_client_max_caps(self):
+ """
+ That the MDS will not let a client sit above mds_max_caps_per_client caps.
+ """
+
+ mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client"))
+ mds_max_caps_per_client = 2*mds_min_caps_per_client
+ self.config_set('mds', 'mds_max_caps_per_client', mds_max_caps_per_client)
+
+ self.mount_a.create_n_files("foo/", 3*mds_max_caps_per_client, sync=True)
+
+ mount_a_client_id = self.mount_a.get_global_id()
+ def expected_caps():
+ num_caps = self.get_session(mount_a_client_id)['num_caps']
+ if num_caps <= mds_max_caps_per_client:
+ return True
+ else:
+ return False
+
+ self.wait_until_true(expected_caps, timeout=60)
diff --git a/qa/tasks/cephfs/test_client_recovery.py b/qa/tasks/cephfs/test_client_recovery.py
new file mode 100644
index 000000000..1bd6884a9
--- /dev/null
+++ b/qa/tasks/cephfs/test_client_recovery.py
@@ -0,0 +1,757 @@
+
+"""
+Teuthology task for exercising CephFS client recovery
+"""
+
+import logging
+from textwrap import dedent
+import time
+import distutils.version as version
+import random
+import re
+import string
+import os
+
+from teuthology.orchestra import run
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.packaging import get_package_version
+
+log = logging.getLogger(__name__)
+
+
+# Arbitrary timeouts for operations involving restarting
+# an MDS or waiting for it to come up
+MDS_RESTART_GRACE = 60
+
+
+class TestClientNetworkRecovery(CephFSTestCase):
+ REQUIRE_ONE_CLIENT_REMOTE = True
+ CLIENTS_REQUIRED = 2
+
+ LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"]
+
+ # Environment references
+ mds_reconnect_timeout = None
+ ms_max_backoff = None
+
+ def test_network_death(self):
+ """
+ Simulate software freeze or temporary network failure.
+
+ Check that the client blocks I/O during failure, and completes
+ I/O after failure.
+ """
+
+ session_timeout = self.fs.get_var("session_timeout")
+ self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false'])
+
+ # We only need one client
+ self.mount_b.umount_wait()
+
+ # Initially our one client session should be visible
+ client_id = self.mount_a.get_global_id()
+ ls_data = self._session_list()
+ self.assert_session_count(1, ls_data)
+ self.assertEqual(ls_data[0]['id'], client_id)
+ self.assert_session_state(client_id, "open")
+
+ # ...and capable of doing I/O without blocking
+ self.mount_a.create_files()
+
+ # ...but if we turn off the network
+ self.fs.set_clients_block(True)
+
+ # ...and try and start an I/O
+ write_blocked = self.mount_a.write_background()
+
+ # ...then it should block
+ self.assertFalse(write_blocked.finished)
+ self.assert_session_state(client_id, "open")
+ time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale
+ self.assertFalse(write_blocked.finished)
+ self.assert_session_state(client_id, "stale")
+
+ # ...until we re-enable I/O
+ self.fs.set_clients_block(False)
+
+ # ...when it should complete promptly
+ a = time.time()
+ self.wait_until_true(lambda: write_blocked.finished, self.ms_max_backoff * 2)
+ write_blocked.wait() # Already know we're finished, wait() to raise exception on errors
+ recovery_time = time.time() - a
+ log.info("recovery time: {0}".format(recovery_time))
+ self.assert_session_state(client_id, "open")
+
+
+class TestClientRecovery(CephFSTestCase):
+ CLIENTS_REQUIRED = 2
+
+ LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"]
+
+ # Environment references
+ mds_reconnect_timeout = None
+ ms_max_backoff = None
+
+ def test_basic(self):
+ # Check that two clients come up healthy and see each others' files
+ # =====================================================
+ self.mount_a.create_files()
+ self.mount_a.check_files()
+ self.mount_a.umount_wait()
+
+ self.mount_b.check_files()
+
+ self.mount_a.mount_wait()
+
+ # Check that the admin socket interface is correctly reporting
+ # two sessions
+ # =====================================================
+ ls_data = self._session_list()
+ self.assert_session_count(2, ls_data)
+
+ self.assertSetEqual(
+ set([l['id'] for l in ls_data]),
+ {self.mount_a.get_global_id(), self.mount_b.get_global_id()}
+ )
+
+ def test_restart(self):
+ # Check that after an MDS restart both clients reconnect and continue
+ # to handle I/O
+ # =====================================================
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
+
+ self.mount_a.create_destroy()
+ self.mount_b.create_destroy()
+
+ def _session_num_caps(self, client_id):
+ ls_data = self.fs.mds_asok(['session', 'ls'])
+ return int(self._session_by_id(ls_data).get(client_id, {'num_caps': None})['num_caps'])
+
+ def test_reconnect_timeout(self):
+ # Reconnect timeout
+ # =================
+ # Check that if I stop an MDS and a client goes away, the MDS waits
+ # for the reconnect period
+
+ mount_a_client_id = self.mount_a.get_global_id()
+
+ self.fs.fail()
+
+ self.mount_a.umount_wait(force=True)
+
+ self.fs.set_joinable()
+
+ self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE)
+ # Check that the MDS locally reports its state correctly
+ status = self.fs.mds_asok(['status'])
+ self.assertIn("reconnect_status", status)
+
+ ls_data = self._session_list()
+ self.assert_session_count(2, ls_data)
+
+ # The session for the dead client should have the 'reconnect' flag set
+ self.assertTrue(self.get_session(mount_a_client_id)['reconnecting'])
+
+ # Wait for the reconnect state to clear, this should take the
+ # reconnect timeout period.
+ in_reconnect_for = self.fs.wait_for_state('up:active', timeout=self.mds_reconnect_timeout * 2)
+ # Check that the period we waited to enter active is within a factor
+ # of two of the reconnect timeout.
+ self.assertGreater(in_reconnect_for, self.mds_reconnect_timeout // 2,
+ "Should have been in reconnect phase for {0} but only took {1}".format(
+ self.mds_reconnect_timeout, in_reconnect_for
+ ))
+
+ self.assert_session_count(1)
+
+ # Check that the client that timed out during reconnect can
+ # mount again and do I/O
+ self.mount_a.mount_wait()
+ self.mount_a.create_destroy()
+
+ self.assert_session_count(2)
+
+ def test_reconnect_eviction(self):
+ # Eviction during reconnect
+ # =========================
+ mount_a_client_id = self.mount_a.get_global_id()
+
+ self.fs.fail()
+
+ # The mount goes away while the MDS is offline
+ self.mount_a.kill()
+
+ # wait for it to die
+ time.sleep(5)
+
+ self.fs.set_joinable()
+
+ # Enter reconnect phase
+ self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE)
+ self.assert_session_count(2)
+
+ # Evict the stuck client
+ self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+ self.assert_session_count(1)
+
+ # Observe that we proceed to active phase without waiting full reconnect timeout
+ evict_til_active = self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
+ # Once we evict the troublemaker, the reconnect phase should complete
+ # in well under the reconnect timeout.
+ self.assertLess(evict_til_active, self.mds_reconnect_timeout * 0.5,
+ "reconnect did not complete soon enough after eviction, took {0}".format(
+ evict_til_active
+ ))
+
+ # We killed earlier so must clean up before trying to use again
+ self.mount_a.kill_cleanup()
+
+ # Bring the client back
+ self.mount_a.mount_wait()
+ self.mount_a.create_destroy()
+
+ def _test_stale_caps(self, write):
+ session_timeout = self.fs.get_var("session_timeout")
+
+ # Capability release from stale session
+ # =====================================
+ if write:
+ content = ''.join(random.choices(string.ascii_uppercase + string.digits, k=16))
+ cap_holder = self.mount_a.open_background(content=content)
+ else:
+ content = ''
+ self.mount_a.run_shell(["touch", "background_file"])
+ self.mount_a.umount_wait()
+ self.mount_a.mount_wait()
+ cap_holder = self.mount_a.open_background(write=False)
+
+ self.assert_session_count(2)
+ mount_a_gid = self.mount_a.get_global_id()
+
+ # Wait for the file to be visible from another client, indicating
+ # that mount_a has completed its network ops
+ self.mount_b.wait_for_visible(size=len(content))
+
+ # Simulate client death
+ self.mount_a.suspend_netns()
+
+ # wait for it to die so it doesn't voluntarily release buffer cap
+ time.sleep(5)
+
+ try:
+ # Now, after session_timeout seconds, the waiter should
+ # complete their operation when the MDS marks the holder's
+ # session stale.
+ cap_waiter = self.mount_b.write_background()
+ a = time.time()
+ cap_waiter.wait()
+ b = time.time()
+
+ # Should have succeeded
+ self.assertEqual(cap_waiter.exitstatus, 0)
+
+ if write:
+ self.assert_session_count(1)
+ else:
+ self.assert_session_state(mount_a_gid, "stale")
+
+ cap_waited = b - a
+ log.info("cap_waiter waited {0}s".format(cap_waited))
+ self.assertTrue(session_timeout / 2.0 <= cap_waited <= session_timeout * 2.0,
+ "Capability handover took {0}, expected approx {1}".format(
+ cap_waited, session_timeout
+ ))
+ finally:
+ self.mount_a.resume_netns() # allow the mount to recover otherwise background proc is unkillable
+ self.mount_a._kill_background(cap_holder)
+
+ def test_stale_read_caps(self):
+ self._test_stale_caps(False)
+
+ def test_stale_write_caps(self):
+ self._test_stale_caps(True)
+
+ def test_evicted_caps(self):
+ # Eviction while holding a capability
+ # ===================================
+
+ session_timeout = self.fs.get_var("session_timeout")
+
+ # Take out a write capability on a file on client A,
+ # and then immediately kill it.
+ cap_holder = self.mount_a.open_background()
+ mount_a_client_id = self.mount_a.get_global_id()
+
+ # Wait for the file to be visible from another client, indicating
+ # that mount_a has completed its network ops
+ self.mount_b.wait_for_visible()
+
+ # Simulate client death
+ self.mount_a.suspend_netns()
+
+ # wait for it to die so it doesn't voluntarily release buffer cap
+ time.sleep(5)
+
+ try:
+ # The waiter should get stuck waiting for the capability
+ # held on the MDS by the now-dead client A
+ cap_waiter = self.mount_b.write_background()
+ time.sleep(5)
+ self.assertFalse(cap_waiter.finished)
+
+ self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+ # Now, because I evicted the old holder of the capability, it should
+ # immediately get handed over to the waiter
+ a = time.time()
+ cap_waiter.wait()
+ b = time.time()
+ cap_waited = b - a
+ log.info("cap_waiter waited {0}s".format(cap_waited))
+ # This is the check that it happened 'now' rather than waiting
+ # for the session timeout
+ self.assertLess(cap_waited, session_timeout / 2.0,
+ "Capability handover took {0}, expected less than {1}".format(
+ cap_waited, session_timeout / 2.0
+ ))
+
+ finally:
+ self.mount_a.resume_netns() # allow the mount to recover otherwise background proc is unkillable
+ self.mount_a._kill_background(cap_holder)
+
+ def test_trim_caps(self):
+ # Trim capability when reconnecting MDS
+ # ===================================
+
+ count = 500
+ # Create lots of files
+ for i in range(count):
+ self.mount_a.run_shell(["touch", "f{0}".format(i)])
+
+ # Populate mount_b's cache
+ self.mount_b.run_shell(["ls", "-l"])
+
+ client_id = self.mount_b.get_global_id()
+ num_caps = self._session_num_caps(client_id)
+ self.assertGreaterEqual(num_caps, count)
+
+ # Restart MDS. client should trim its cache when reconnecting to the MDS
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
+
+ num_caps = self._session_num_caps(client_id)
+ self.assertLess(num_caps, count,
+ "should have less than {0} capabilities, have {1}".format(
+ count, num_caps
+ ))
+
+ def _is_flockable(self):
+ a_version_str = get_package_version(self.mount_a.client_remote, "fuse")
+ b_version_str = get_package_version(self.mount_b.client_remote, "fuse")
+ flock_version_str = "2.9"
+
+ version_regex = re.compile(r"[0-9\.]+")
+ a_result = version_regex.match(a_version_str)
+ self.assertTrue(a_result)
+ b_result = version_regex.match(b_version_str)
+ self.assertTrue(b_result)
+ a_version = version.StrictVersion(a_result.group())
+ b_version = version.StrictVersion(b_result.group())
+ flock_version=version.StrictVersion(flock_version_str)
+
+ if (a_version >= flock_version and b_version >= flock_version):
+ log.info("flock locks are available")
+ return True
+ else:
+ log.info("not testing flock locks, machines have versions {av} and {bv}".format(
+ av=a_version_str,bv=b_version_str))
+ return False
+
+ def test_filelock(self):
+ """
+ Check that file lock doesn't get lost after an MDS restart
+ """
+
+ flockable = self._is_flockable()
+ lock_holder = self.mount_a.lock_background(do_flock=flockable)
+
+ self.mount_b.wait_for_visible("background_file-2")
+ self.mount_b.check_filelock(do_flock=flockable)
+
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE)
+
+ self.mount_b.check_filelock(do_flock=flockable)
+
+ self.mount_a._kill_background(lock_holder)
+
+ def test_filelock_eviction(self):
+ """
+ Check that file lock held by evicted client is given to
+ waiting client.
+ """
+ if not self._is_flockable():
+ self.skipTest("flock is not available")
+
+ lock_holder = self.mount_a.lock_background()
+ self.mount_b.wait_for_visible("background_file-2")
+ self.mount_b.check_filelock()
+
+ lock_taker = self.mount_b.lock_and_release()
+ # Check the taker is waiting (doesn't get it immediately)
+ time.sleep(2)
+ self.assertFalse(lock_holder.finished)
+ self.assertFalse(lock_taker.finished)
+
+ try:
+ mount_a_client_id = self.mount_a.get_global_id()
+ self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+
+ # Evicting mount_a should let mount_b's attempt to take the lock
+ # succeed
+ self.wait_until_true(lambda: lock_taker.finished, timeout=10)
+ finally:
+ self.mount_a._kill_background(lock_holder)
+
+ # teardown() doesn't quite handle this case cleanly, so help it out
+ self.mount_a.kill()
+ self.mount_a.kill_cleanup()
+
+ # Bring the client back
+ self.mount_a.mount_wait()
+
+ def test_dir_fsync(self):
+ self._test_fsync(True);
+
+ def test_create_fsync(self):
+ self._test_fsync(False);
+
+ def _test_fsync(self, dirfsync):
+ """
+ That calls to fsync guarantee visibility of metadata to another
+ client immediately after the fsyncing client dies.
+ """
+
+ # Leave this guy out until he's needed
+ self.mount_b.umount_wait()
+
+ # Create dir + child dentry on client A, and fsync the dir
+ path = os.path.join(self.mount_a.mountpoint, "subdir")
+ self.mount_a.run_python(
+ dedent("""
+ import os
+ import time
+
+ path = "{path}"
+
+ print("Starting creation...")
+ start = time.time()
+
+ os.mkdir(path)
+ dfd = os.open(path, os.O_DIRECTORY)
+
+ fd = open(os.path.join(path, "childfile"), "w")
+ print("Finished creation in {{0}}s".format(time.time() - start))
+
+ print("Starting fsync...")
+ start = time.time()
+ if {dirfsync}:
+ os.fsync(dfd)
+ else:
+ os.fsync(fd)
+ print("Finished fsync in {{0}}s".format(time.time() - start))
+ """.format(path=path,dirfsync=str(dirfsync)))
+ )
+
+ # Immediately kill the MDS and then client A
+ self.fs.fail()
+ self.mount_a.kill()
+ self.mount_a.kill_cleanup()
+
+ # Restart the MDS. Wait for it to come up, it'll have to time out in clientreplay
+ self.fs.set_joinable()
+ log.info("Waiting for reconnect...")
+ self.fs.wait_for_state("up:reconnect")
+ log.info("Waiting for active...")
+ self.fs.wait_for_state("up:active", timeout=MDS_RESTART_GRACE + self.mds_reconnect_timeout)
+ log.info("Reached active...")
+
+ # Is the child dentry visible from mount B?
+ self.mount_b.mount_wait()
+ self.mount_b.run_shell(["ls", "subdir/childfile"])
+
+ def test_unmount_for_evicted_client(self):
+ """Test if client hangs on unmount after evicting the client."""
+ mount_a_client_id = self.mount_a.get_global_id()
+ self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+
+ self.mount_a.umount_wait(require_clean=True, timeout=30)
+
+ def test_mount_after_evicted_client(self):
+ """Test if a new mount of same fs works after client eviction."""
+
+ # trash this : we need it to use same remote as mount_a
+ self.mount_b.umount_wait()
+
+ cl = self.mount_a.__class__
+
+ # create a new instance of mount_a's class with most of the
+ # same settings, but mounted on mount_b's mountpoint.
+ m = cl(ctx=self.mount_a.ctx,
+ client_config=self.mount_a.client_config,
+ test_dir=self.mount_a.test_dir,
+ client_id=self.mount_a.client_id,
+ client_remote=self.mount_a.client_remote,
+ client_keyring_path=self.mount_a.client_keyring_path,
+ cephfs_name=self.mount_a.cephfs_name,
+ cephfs_mntpt= self.mount_a.cephfs_mntpt,
+ hostfs_mntpt=self.mount_b.hostfs_mntpt,
+ brxnet=self.mount_a.ceph_brx_net)
+
+ # evict mount_a
+ mount_a_client_id = self.mount_a.get_global_id()
+ self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+
+ m.mount_wait()
+ m.create_files()
+ m.check_files()
+ m.umount_wait(require_clean=True)
+
+ def test_stale_renew(self):
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("Require FUSE client to handle signal STOP/CONT")
+
+ session_timeout = self.fs.get_var("session_timeout")
+
+ self.mount_a.run_shell(["mkdir", "testdir"])
+ self.mount_a.run_shell(["touch", "testdir/file1"])
+ # populate readdir cache
+ self.mount_a.run_shell(["ls", "testdir"])
+ self.mount_b.run_shell(["ls", "testdir"])
+
+ # check if readdir cache is effective
+ initial_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency'])
+ self.mount_b.run_shell(["ls", "testdir"])
+ current_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency'])
+ self.assertEqual(current_readdirs, initial_readdirs);
+
+ mount_b_gid = self.mount_b.get_global_id()
+ # stop ceph-fuse process of mount_b
+ self.mount_b.suspend_netns()
+
+ self.assert_session_state(mount_b_gid, "open")
+ time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale
+
+ self.mount_a.run_shell(["touch", "testdir/file2"])
+ self.assert_session_state(mount_b_gid, "stale")
+
+ # resume ceph-fuse process of mount_b
+ self.mount_b.resume_netns()
+ # Is the new file visible from mount_b? (caps become invalid after session stale)
+ self.mount_b.run_shell(["ls", "testdir/file2"])
+
+ def test_abort_conn(self):
+ """
+ Check that abort_conn() skips closing mds sessions.
+ """
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("Testing libcephfs function")
+
+ self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false'])
+ session_timeout = self.fs.get_var("session_timeout")
+
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ gid_str = self.mount_a.run_python(dedent("""
+ import cephfs as libcephfs
+ cephfs = libcephfs.LibCephFS(conffile='')
+ cephfs.mount()
+ client_id = cephfs.get_instance_id()
+ cephfs.abort_conn()
+ print(client_id)
+ """)
+ )
+ gid = int(gid_str);
+
+ self.assert_session_state(gid, "open")
+ time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale
+ self.assert_session_state(gid, "stale")
+
+ def test_dont_mark_unresponsive_client_stale(self):
+ """
+ Test that an unresponsive client holding caps is not marked stale or
+ evicted unless another clients wants its caps.
+ """
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("Require FUSE client to handle signal STOP/CONT")
+
+ # XXX: To conduct this test we need at least two clients since a
+ # single client is never evcited by MDS.
+ SESSION_TIMEOUT = 30
+ SESSION_AUTOCLOSE = 50
+ time_at_beg = time.time()
+ mount_a_gid = self.mount_a.get_global_id()
+ _ = self.mount_a.client_pid
+ self.fs.set_var('session_timeout', SESSION_TIMEOUT)
+ self.fs.set_var('session_autoclose', SESSION_AUTOCLOSE)
+ self.assert_session_count(2, self.fs.mds_asok(['session', 'ls']))
+
+ # test that client holding cap not required by any other client is not
+ # marked stale when it becomes unresponsive.
+ self.mount_a.run_shell(['mkdir', 'dir'])
+ self.mount_a.send_signal('sigstop')
+ time.sleep(SESSION_TIMEOUT + 2)
+ self.assert_session_state(mount_a_gid, "open")
+
+ # test that other clients have to wait to get the caps from
+ # unresponsive client until session_autoclose.
+ self.mount_b.run_shell(['stat', 'dir'])
+ self.assert_session_count(1, self.fs.mds_asok(['session', 'ls']))
+ self.assertLess(time.time(), time_at_beg + SESSION_AUTOCLOSE)
+
+ self.mount_a.send_signal('sigcont')
+
+ def test_config_session_timeout(self):
+ self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false'])
+ session_timeout = self.fs.get_var("session_timeout")
+ mount_a_gid = self.mount_a.get_global_id()
+
+ self.fs.mds_asok(['session', 'config', '%s' % mount_a_gid, 'timeout', '%s' % (session_timeout * 2)])
+
+ self.mount_a.kill();
+
+ self.assert_session_count(2)
+
+ time.sleep(session_timeout * 1.5)
+ self.assert_session_state(mount_a_gid, "open")
+
+ time.sleep(session_timeout)
+ self.assert_session_count(1)
+
+ self.mount_a.kill_cleanup()
+
+ def test_reconnect_after_blocklisted(self):
+ """
+ Test reconnect after blocklisted.
+ - writing to a fd that was opened before blocklist should return -EBADF
+ - reading/writing to a file with lost file locks should return -EIO
+ - readonly fd should continue to work
+ """
+
+ self.mount_a.umount_wait()
+
+ if isinstance(self.mount_a, FuseMount):
+ self.mount_a.mount_wait(mntargs=['--client_reconnect_stale=1', '--fuse_disable_pagecache=1'])
+ else:
+ try:
+ self.mount_a.mount_wait(mntopts=['recover_session=clean'])
+ except CommandFailedError:
+ self.mount_a.kill_cleanup()
+ self.skipTest("Not implemented in current kernel")
+
+ self.mount_a.wait_until_mounted()
+
+ path = os.path.join(self.mount_a.mountpoint, 'testfile_reconnect_after_blocklisted')
+ pyscript = dedent("""
+ import os
+ import sys
+ import fcntl
+ import errno
+ import time
+
+ fd1 = os.open("{path}.1", os.O_RDWR | os.O_CREAT, 0O666)
+ fd2 = os.open("{path}.1", os.O_RDONLY)
+ fd3 = os.open("{path}.2", os.O_RDWR | os.O_CREAT, 0O666)
+ fd4 = os.open("{path}.2", os.O_RDONLY)
+
+ os.write(fd1, b'content')
+ os.read(fd2, 1);
+
+ os.write(fd3, b'content')
+ os.read(fd4, 1);
+ fcntl.flock(fd4, fcntl.LOCK_SH | fcntl.LOCK_NB)
+
+ print("blocklist")
+ sys.stdout.flush()
+
+ sys.stdin.readline()
+
+ # wait for mds to close session
+ time.sleep(10);
+
+ # trigger 'open session' message. kclient relies on 'session reject' message
+ # to detect if itself is blocklisted
+ try:
+ os.stat("{path}.1")
+ except:
+ pass
+
+ # wait for auto reconnect
+ time.sleep(10);
+
+ try:
+ os.write(fd1, b'content')
+ except OSError as e:
+ if e.errno != errno.EBADF:
+ raise
+ else:
+ raise RuntimeError("write() failed to raise error")
+
+ os.read(fd2, 1);
+
+ try:
+ os.read(fd4, 1)
+ except OSError as e:
+ if e.errno != errno.EIO:
+ raise
+ else:
+ raise RuntimeError("read() failed to raise error")
+ """).format(path=path)
+ rproc = self.mount_a.client_remote.run(
+ args=['python3', '-c', pyscript],
+ wait=False, stdin=run.PIPE, stdout=run.PIPE)
+
+ rproc.stdout.readline()
+
+ mount_a_client_id = self.mount_a.get_global_id()
+ self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+
+ rproc.stdin.writelines(['done\n'])
+ rproc.stdin.flush()
+
+ rproc.wait()
+ self.assertEqual(rproc.exitstatus, 0)
+
+ def test_refuse_client_session(self):
+ """
+ Test that client cannot start session when file system flag
+ refuse_client_session is set
+ """
+
+ self.mount_a.umount_wait()
+ self.fs.set_refuse_client_session(True)
+ with self.assertRaises(CommandFailedError):
+ self.mount_a.mount_wait()
+
+ def test_refuse_client_session_on_reconnect(self):
+ """
+ Test that client cannot reconnect when filesystem comes online and
+ file system flag refuse_client_session is set
+ """
+
+ self.mount_a.create_files()
+ self.mount_a.check_files()
+
+ self.fs.fail()
+ self.fs.set_refuse_client_session(True)
+ self.fs.set_joinable()
+ with self.assert_cluster_log('client could not reconnect as'
+ ' file system flag'
+ ' refuse_client_session is set'):
+ time.sleep(self.fs.get_var("session_timeout") * 1.5)
+ self.assertEqual(len(self.fs.mds_tell(["session", "ls"])), 0)
+ self.mount_a.umount_wait(force=True)
+
diff --git a/qa/tasks/cephfs/test_damage.py b/qa/tasks/cephfs/test_damage.py
new file mode 100644
index 000000000..bfaa23453
--- /dev/null
+++ b/qa/tasks/cephfs/test_damage.py
@@ -0,0 +1,663 @@
+from io import BytesIO, StringIO
+import json
+import logging
+import errno
+import re
+import time
+from teuthology.contextutil import MaxWhileTries
+from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra.run import wait
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
+
+DAMAGED_ON_START = "damaged_on_start"
+DAMAGED_ON_LS = "damaged_on_ls"
+CRASHED = "server crashed"
+NO_DAMAGE = "no damage"
+READONLY = "readonly"
+FAILED_CLIENT = "client failed"
+FAILED_SERVER = "server failed"
+
+# An EIO in response to a stat from the client
+EIO_ON_LS = "eio"
+
+# An EIO, but nothing in damage table (not ever what we expect)
+EIO_NO_DAMAGE = "eio without damage entry"
+
+
+log = logging.getLogger(__name__)
+
+
+class TestDamage(CephFSTestCase):
+ def _simple_workload_write(self):
+ self.mount_a.run_shell(["mkdir", "subdir"])
+ self.mount_a.write_n_mb("subdir/sixmegs", 6)
+ return self.mount_a.stat("subdir/sixmegs")
+
+ def is_marked_damaged(self, rank):
+ mds_map = self.fs.get_mds_map()
+ return rank in mds_map['damaged']
+
+ @for_teuthology #459s
+ def test_object_deletion(self):
+ """
+ That the MDS has a clean 'damaged' response to loss of any single metadata object
+ """
+
+ self._simple_workload_write()
+
+ # Hmm, actually it would be nice to permute whether the metadata pool
+ # state contains sessions or not, but for the moment close this session
+ # to avoid waiting through reconnect on every MDS start.
+ self.mount_a.umount_wait()
+ for mds_name in self.fs.get_active_names():
+ self.fs.mds_asok(["flush", "journal"], mds_name)
+
+ self.fs.fail()
+
+ serialized = self.fs.radosmo(['export', '-'])
+
+ def is_ignored(obj_id, dentry=None):
+ """
+ A filter to avoid redundantly mutating many similar objects (e.g.
+ stray dirfrags) or similar dentries (e.g. stray dir dentries)
+ """
+ if re.match("60.\.00000000", obj_id) and obj_id != "600.00000000":
+ return True
+
+ if dentry and obj_id == "100.00000000":
+ if re.match("stray.+_head", dentry) and dentry != "stray0_head":
+ return True
+
+ return False
+
+ def get_path(obj_id, dentry=None):
+ """
+ What filesystem path does this object or dentry correspond to? i.e.
+ what should I poke to see EIO after damaging it?
+ """
+
+ if obj_id == "1.00000000" and dentry == "subdir_head":
+ return "./subdir"
+ elif obj_id == "10000000000.00000000" and dentry == "sixmegs_head":
+ return "./subdir/sixmegs"
+
+ # None means ls will do an "ls -R" in hope of seeing some errors
+ return None
+
+ objects = self.fs.radosmo(["ls"], stdout=StringIO()).strip().split("\n")
+ objects = [o for o in objects if not is_ignored(o)]
+
+ # Find all objects with an OMAP header
+ omap_header_objs = []
+ for o in objects:
+ header = self.fs.radosmo(["getomapheader", o], stdout=StringIO())
+ # The rados CLI wraps the header output in a hex-printed style
+ header_bytes = int(re.match("header \((.+) bytes\)", header).group(1))
+ if header_bytes > 0:
+ omap_header_objs.append(o)
+
+ # Find all OMAP key/vals
+ omap_keys = []
+ for o in objects:
+ keys_str = self.fs.radosmo(["listomapkeys", o], stdout=StringIO())
+ if keys_str:
+ for key in keys_str.strip().split("\n"):
+ if not is_ignored(o, key):
+ omap_keys.append((o, key))
+
+ # Find objects that have data in their bodies
+ data_objects = []
+ for obj_id in objects:
+ stat_out = self.fs.radosmo(["stat", obj_id], stdout=StringIO())
+ size = int(re.match(".+, size (.+)$", stat_out).group(1))
+ if size > 0:
+ data_objects.append(obj_id)
+
+ # Define the various forms of damage we will inflict
+ class MetadataMutation(object):
+ def __init__(self, obj_id_, desc_, mutate_fn_, expectation_, ls_path=None):
+ self.obj_id = obj_id_
+ self.desc = desc_
+ self.mutate_fn = mutate_fn_
+ self.expectation = expectation_
+ if ls_path is None:
+ self.ls_path = "."
+ else:
+ self.ls_path = ls_path
+
+ def __eq__(self, other):
+ return self.desc == other.desc
+
+ def __hash__(self):
+ return hash(self.desc)
+
+ junk = "deadbeef" * 10
+ mutations = []
+
+ # Removals
+ for o in objects:
+ if o in [
+ # JournalPointers are auto-replaced if missing (same path as upgrade)
+ "400.00000000",
+ # Missing dirfrags for non-system dirs result in empty directory
+ "10000000000.00000000",
+ # PurgeQueue is auto-created if not found on startup
+ "500.00000000",
+ # open file table is auto-created if not found on startup
+ "mds0_openfiles.0"
+ ]:
+ expectation = NO_DAMAGE
+ else:
+ expectation = DAMAGED_ON_START
+
+ log.info("Expectation on rm '{0}' will be '{1}'".format(
+ o, expectation
+ ))
+
+ mutations.append(MetadataMutation(
+ o,
+ "Delete {0}".format(o),
+ lambda o=o: self.fs.radosm(["rm", o]),
+ expectation
+ ))
+
+ # Blatant corruptions
+ for obj_id in data_objects:
+ if obj_id == "500.00000000":
+ # purge queue corruption results in read-only FS
+ mutations.append(MetadataMutation(
+ obj_id,
+ "Corrupt {0}".format(obj_id),
+ lambda o=obj_id: self.fs.radosm(["put", o, "-"], stdin=StringIO(junk)),
+ READONLY
+ ))
+ else:
+ mutations.append(MetadataMutation(
+ obj_id,
+ "Corrupt {0}".format(obj_id),
+ lambda o=obj_id: self.fs.radosm(["put", o, "-"], stdin=StringIO(junk)),
+ DAMAGED_ON_START
+ ))
+
+ # Truncations
+ for o in data_objects:
+ if o == "500.00000000":
+ # The PurgeQueue is allowed to be empty: Journaler interprets
+ # an empty header object as an empty journal.
+ expectation = NO_DAMAGE
+ else:
+ expectation = DAMAGED_ON_START
+
+ mutations.append(
+ MetadataMutation(
+ o,
+ "Truncate {0}".format(o),
+ lambda o=o: self.fs.radosm(["truncate", o, "0"]),
+ expectation
+ ))
+
+ # OMAP value corruptions
+ for o, k in omap_keys:
+ if o.startswith("100."):
+ # Anything in rank 0's 'mydir'
+ expectation = DAMAGED_ON_START
+ else:
+ expectation = EIO_ON_LS
+
+ mutations.append(
+ MetadataMutation(
+ o,
+ "Corrupt omap key {0}:{1}".format(o, k),
+ lambda o=o,k=k: self.fs.radosm(["setomapval", o, k, junk]),
+ expectation,
+ get_path(o, k)
+ )
+ )
+
+ # OMAP header corruptions
+ for o in omap_header_objs:
+ if re.match("60.\.00000000", o) \
+ or o in ["1.00000000", "100.00000000", "mds0_sessionmap"]:
+ expectation = DAMAGED_ON_START
+ else:
+ expectation = NO_DAMAGE
+
+ log.info("Expectation on corrupt header '{0}' will be '{1}'".format(
+ o, expectation
+ ))
+
+ mutations.append(
+ MetadataMutation(
+ o,
+ "Corrupt omap header on {0}".format(o),
+ lambda o=o: self.fs.radosm(["setomapheader", o, junk]),
+ expectation
+ )
+ )
+
+ results = {}
+
+ for mutation in mutations:
+ log.info("Applying mutation '{0}'".format(mutation.desc))
+
+ # Reset MDS state
+ self.mount_a.umount_wait(force=True)
+ self.fs.fail()
+ self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
+
+ # Reset RADOS pool state
+ self.fs.radosm(['import', '-'], stdin=BytesIO(serialized))
+
+ # Inject the mutation
+ mutation.mutate_fn()
+
+ # Try starting the MDS
+ self.fs.set_joinable()
+
+ # How long we'll wait between starting a daemon and expecting
+ # it to make it through startup, and potentially declare itself
+ # damaged to the mon cluster.
+ startup_timeout = 60
+
+ if mutation.expectation not in (EIO_ON_LS, DAMAGED_ON_LS, NO_DAMAGE):
+ if mutation.expectation == DAMAGED_ON_START:
+ # The MDS may pass through active before making it to damaged
+ try:
+ self.wait_until_true(lambda: self.is_marked_damaged(0), startup_timeout)
+ except RuntimeError:
+ pass
+
+ # Wait for MDS to either come up or go into damaged state
+ try:
+ self.wait_until_true(lambda: self.is_marked_damaged(0) or self.fs.are_daemons_healthy(), startup_timeout)
+ except RuntimeError:
+ crashed = False
+ # Didn't make it to healthy or damaged, did it crash?
+ for daemon_id, daemon in self.fs.mds_daemons.items():
+ if daemon.proc and daemon.proc.finished:
+ crashed = True
+ log.error("Daemon {0} crashed!".format(daemon_id))
+ daemon.proc = None # So that subsequent stop() doesn't raise error
+ if not crashed:
+ # Didn't go health, didn't go damaged, didn't crash, so what?
+ raise
+ else:
+ log.info("Result: Mutation '{0}' led to crash".format(mutation.desc))
+ results[mutation] = CRASHED
+ continue
+ if self.is_marked_damaged(0):
+ log.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation.desc))
+ results[mutation] = DAMAGED_ON_START
+ continue
+ else:
+ log.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation.desc))
+ else:
+ try:
+ self.wait_until_true(self.fs.are_daemons_healthy, 60)
+ except RuntimeError:
+ log.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation.desc))
+ if self.is_marked_damaged(0):
+ results[mutation] = DAMAGED_ON_START
+ else:
+ results[mutation] = FAILED_SERVER
+ continue
+ log.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation.desc))
+
+ # MDS is up, should go damaged on ls or client mount
+ self.mount_a.mount_wait()
+ if mutation.ls_path == ".":
+ proc = self.mount_a.run_shell(["ls", "-R", mutation.ls_path], wait=False)
+ else:
+ proc = self.mount_a.stat(mutation.ls_path, wait=False)
+
+ if mutation.expectation == DAMAGED_ON_LS:
+ try:
+ self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
+ log.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation.desc))
+ results[mutation] = DAMAGED_ON_LS
+ except RuntimeError:
+ if self.fs.are_daemons_healthy():
+ log.error("Result: Failed to go damaged on mutation '{0}', actually went active".format(
+ mutation.desc))
+ results[mutation] = NO_DAMAGE
+ else:
+ log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc))
+ results[mutation] = FAILED_SERVER
+ elif mutation.expectation == READONLY:
+ proc = self.mount_a.run_shell(["mkdir", "foo"], wait=False)
+ try:
+ proc.wait()
+ except CommandFailedError:
+ stderr = proc.stderr.getvalue()
+ log.info(stderr)
+ if "Read-only file system".lower() in stderr.lower():
+ pass
+ else:
+ raise
+ else:
+ try:
+ wait([proc], 20)
+ log.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation.desc))
+ results[mutation] = NO_DAMAGE
+ except MaxWhileTries:
+ log.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation.desc))
+ results[mutation] = FAILED_CLIENT
+ except CommandFailedError as e:
+ if e.exitstatus == errno.EIO:
+ log.info("Result: EIO on client")
+ results[mutation] = EIO_ON_LS
+ else:
+ log.info("Result: unexpected error {0} on client".format(e))
+ results[mutation] = FAILED_CLIENT
+
+ if mutation.expectation == EIO_ON_LS:
+ # EIOs mean something handled by DamageTable: assert that it has
+ # been populated
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
+ if len(damage) == 0:
+ results[mutation] = EIO_NO_DAMAGE
+
+ failures = [(mutation, result) for (mutation, result) in results.items() if mutation.expectation != result]
+ if failures:
+ log.error("{0} mutations had unexpected outcomes:".format(len(failures)))
+ for mutation, result in failures:
+ log.error(" Expected '{0}' actually '{1}' from '{2}'".format(
+ mutation.expectation, result, mutation.desc
+ ))
+ raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures)))
+ else:
+ log.info("All {0} mutations had expected outcomes".format(len(mutations)))
+
+ def test_damaged_dentry(self):
+ # Damage to dentrys is interesting because it leaves the
+ # directory's `complete` flag in a subtle state where
+ # we have marked the dir complete in order that folks
+ # can access it, but in actual fact there is a dentry
+ # missing
+ self.mount_a.run_shell(["mkdir", "subdir/"])
+
+ self.mount_a.run_shell(["touch", "subdir/file_undamaged"])
+ self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"])
+
+ subdir_ino = self.mount_a.path_to_ino("subdir")
+
+ self.mount_a.umount_wait()
+ for mds_name in self.fs.get_active_names():
+ self.fs.mds_asok(["flush", "journal"], mds_name)
+
+ self.fs.fail()
+
+ # Corrupt a dentry
+ junk = "deadbeef" * 10
+ dirfrag_obj = "{0:x}.00000000".format(subdir_ino)
+ self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
+
+ # Start up and try to list it
+ self.fs.set_joinable()
+ self.fs.wait_for_daemons()
+
+ self.mount_a.mount_wait()
+ dentries = self.mount_a.ls("subdir/")
+
+ # The damaged guy should have disappeared
+ self.assertEqual(dentries, ["file_undamaged"])
+
+ # I should get ENOENT if I try and read it normally, because
+ # the dir is considered complete
+ try:
+ self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ raise AssertionError("Expected ENOENT")
+
+ # The fact that there is damaged should have bee recorded
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "ls", '--format=json-pretty'))
+ self.assertEqual(len(damage), 1)
+ damage_id = damage[0]['id']
+
+ # If I try to create a dentry with the same name as the damaged guy
+ # then that should be forbidden
+ try:
+ self.mount_a.touch("subdir/file_to_be_damaged")
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EIO)
+ else:
+ raise AssertionError("Expected EIO")
+
+ # Attempting that touch will clear the client's complete flag, now
+ # when I stat it I'll get EIO instead of ENOENT
+ try:
+ self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
+ except CommandFailedError as e:
+ if isinstance(self.mount_a, FuseMount):
+ self.assertEqual(e.exitstatus, errno.EIO)
+ else:
+ # Old kernel client handles this case differently
+ self.assertIn(e.exitstatus, [errno.ENOENT, errno.EIO])
+ else:
+ raise AssertionError("Expected EIO")
+
+ nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
+ self.assertEqual(nfiles, "2")
+
+ self.mount_a.umount_wait()
+
+ # Now repair the stats
+ scrub_json = self.fs.run_scrub(["start", "/subdir", "repair"])
+ log.info(json.dumps(scrub_json, indent=2))
+
+ self.assertNotEqual(scrub_json, None)
+ self.assertEqual(scrub_json["return_code"], 0)
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=scrub_json["scrub_tag"]), True)
+
+ # Check that the file count is now correct
+ self.mount_a.mount_wait()
+ nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
+ self.assertEqual(nfiles, "1")
+
+ # Clean up the omap object
+ self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
+
+ # Clean up the damagetable entry
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "rm", "{did}".format(did=damage_id))
+
+ # Now I should be able to create a file with the same name as the
+ # damaged guy if I want.
+ self.mount_a.touch("subdir/file_to_be_damaged")
+
+ def test_open_ino_errors(self):
+ """
+ That errors encountered during opening inos are properly propagated
+ """
+
+ self.mount_a.run_shell(["mkdir", "dir1"])
+ self.mount_a.run_shell(["touch", "dir1/file1"])
+ self.mount_a.run_shell(["mkdir", "dir2"])
+ self.mount_a.run_shell(["touch", "dir2/file2"])
+ self.mount_a.run_shell(["mkdir", "testdir"])
+ self.mount_a.run_shell(["ln", "dir1/file1", "testdir/hardlink1"])
+ self.mount_a.run_shell(["ln", "dir2/file2", "testdir/hardlink2"])
+
+ file1_ino = self.mount_a.path_to_ino("dir1/file1")
+ file2_ino = self.mount_a.path_to_ino("dir2/file2")
+ dir2_ino = self.mount_a.path_to_ino("dir2")
+
+ # Ensure everything is written to backing store
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(["flush", "journal"])
+
+ # Drop everything from the MDS cache
+ self.fs.fail()
+ self.fs.journal_tool(['journal', 'reset'], 0)
+ self.fs.set_joinable()
+ self.fs.wait_for_daemons()
+
+ self.mount_a.mount_wait()
+
+ # Case 1: un-decodeable backtrace
+
+ # Validate that the backtrace is present and decodable
+ self.fs.read_backtrace(file1_ino)
+ # Go corrupt the backtrace of alpha/target (used for resolving
+ # bravo/hardlink).
+ self.fs._write_data_xattr(file1_ino, "parent", "rhubarb")
+
+ # Check that touching the hardlink gives EIO
+ ran = self.mount_a.run_shell(["stat", "testdir/hardlink1"], wait=False)
+ try:
+ ran.wait()
+ except CommandFailedError:
+ self.assertTrue("Input/output error" in ran.stderr.getvalue())
+
+ # Check that an entry is created in the damage table
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "ls", '--format=json-pretty'))
+ self.assertEqual(len(damage), 1)
+ self.assertEqual(damage[0]['damage_type'], "backtrace")
+ self.assertEqual(damage[0]['ino'], file1_ino)
+
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "rm", str(damage[0]['id']))
+
+
+ # Case 2: missing dirfrag for the target inode
+
+ self.fs.radosm(["rm", "{0:x}.00000000".format(dir2_ino)])
+
+ # Check that touching the hardlink gives EIO
+ ran = self.mount_a.run_shell(["stat", "testdir/hardlink2"], wait=False)
+ try:
+ ran.wait()
+ except CommandFailedError:
+ self.assertTrue("Input/output error" in ran.stderr.getvalue())
+
+ # Check that an entry is created in the damage table
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "ls", '--format=json-pretty'))
+ self.assertEqual(len(damage), 2)
+ if damage[0]['damage_type'] == "backtrace" :
+ self.assertEqual(damage[0]['ino'], file2_ino)
+ self.assertEqual(damage[1]['damage_type'], "dir_frag")
+ self.assertEqual(damage[1]['ino'], dir2_ino)
+ else:
+ self.assertEqual(damage[0]['damage_type'], "dir_frag")
+ self.assertEqual(damage[0]['ino'], dir2_ino)
+ self.assertEqual(damage[1]['damage_type'], "backtrace")
+ self.assertEqual(damage[1]['ino'], file2_ino)
+
+ for entry in damage:
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "rm", str(entry['id']))
+
+ def test_dentry_first_existing(self):
+ """
+ That the MDS won't abort when the dentry is already known to be damaged.
+ """
+
+ def verify_corrupt():
+ info = self.fs.read_cache("/a", 0)
+ log.debug('%s', info)
+ self.assertEqual(len(info), 1)
+ dirfrags = info[0]['dirfrags']
+ self.assertEqual(len(dirfrags), 1)
+ dentries = dirfrags[0]['dentries']
+ self.assertEqual([dn['path'] for dn in dentries if dn['is_primary']], ['a/c'])
+ self.assertEqual(dentries[0]['snap_first'], 18446744073709551606) # SNAP_HEAD
+
+ self.mount_a.run_shell_payload("mkdir -p a/b")
+ self.fs.flush()
+ self.config_set("mds", "mds_abort_on_newly_corrupt_dentry", False)
+ self.config_set("mds", "mds_inject_rename_corrupt_dentry_first", "1.0")
+ time.sleep(5) # for conf to percolate
+ self.mount_a.run_shell_payload("mv a/b a/c; sync .")
+ self.mount_a.umount()
+ verify_corrupt()
+ self.fs.fail()
+ self.config_rm("mds", "mds_inject_rename_corrupt_dentry_first")
+ self.config_set("mds", "mds_abort_on_newly_corrupt_dentry", False)
+ self.fs.set_joinable()
+ status = self.fs.status()
+ self.fs.flush()
+ self.assertFalse(self.fs.status().hadfailover(status))
+ verify_corrupt()
+
+ def test_dentry_first_preflush(self):
+ """
+ That the MDS won't write a dentry with new damage to CDentry::first
+ to the journal.
+ """
+
+ rank0 = self.fs.get_rank()
+ self.fs.rank_freeze(True, rank=0)
+ self.mount_a.run_shell_payload("mkdir -p a/{b,c}/d")
+ self.fs.flush()
+ self.config_set("mds", "mds_inject_rename_corrupt_dentry_first", "1.0")
+ time.sleep(5) # for conf to percolate
+ with self.assert_cluster_log("MDS abort because newly corrupt dentry"):
+ p = self.mount_a.run_shell_payload("timeout 60 mv a/b a/z", wait=False)
+ self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(), timeout=self.fs.beacon_timeout)
+ self.config_rm("mds", "mds_inject_rename_corrupt_dentry_first")
+ self.fs.rank_freeze(False, rank=0)
+ self.delete_mds_coredump(rank0['name'])
+ self.fs.mds_restart(rank0['name'])
+ self.fs.wait_for_daemons()
+ p.wait()
+ self.mount_a.run_shell_payload("stat a/ && find a/")
+ self.fs.flush()
+
+ def test_dentry_first_precommit(self):
+ """
+ That the MDS won't write a dentry with new damage to CDentry::first
+ to the directory object.
+ """
+
+ fscid = self.fs.id
+ self.mount_a.run_shell_payload("mkdir -p a/{b,c}/d; sync .")
+ self.mount_a.umount() # allow immediate scatter write back
+ self.fs.flush()
+ # now just twiddle some inode metadata on a regular file
+ self.mount_a.mount_wait()
+ self.mount_a.run_shell_payload("chmod 711 a/b/d; sync .")
+ self.mount_a.umount() # avoid journaling session related things
+ # okay, now cause the dentry to get damaged after loading from the journal
+ self.fs.fail()
+ self.config_set("mds", "mds_inject_journal_corrupt_dentry_first", "1.0")
+ time.sleep(5) # for conf to percolate
+ self.fs.set_joinable()
+ self.fs.wait_for_daemons()
+ rank0 = self.fs.get_rank()
+ self.fs.rank_freeze(True, rank=0)
+ # so now we want to trigger commit but this will crash, so:
+ with self.assert_cluster_log("MDS abort because newly corrupt dentry"):
+ c = ['--connect-timeout=60', 'tell', f"mds.{fscid}:0", "flush", "journal"]
+ p = self.ceph_cluster.mon_manager.run_cluster_cmd(args=c, wait=False, timeoutcmd=30)
+ self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(), timeout=self.fs.beacon_timeout)
+ self.config_rm("mds", "mds_inject_journal_corrupt_dentry_first")
+ self.fs.rank_freeze(False, rank=0)
+ self.delete_mds_coredump(rank0['name'])
+ self.fs.mds_restart(rank0['name'])
+ self.fs.wait_for_daemons()
+ try:
+ p.wait()
+ except CommandFailedError as e:
+ print(e)
+ else:
+ self.fail("flush journal should fail!")
+ self.mount_a.mount_wait()
+ self.mount_a.run_shell_payload("stat a/ && find a/")
+ self.fs.flush()
diff --git a/qa/tasks/cephfs/test_data_scan.py b/qa/tasks/cephfs/test_data_scan.py
new file mode 100644
index 000000000..9a93bd622
--- /dev/null
+++ b/qa/tasks/cephfs/test_data_scan.py
@@ -0,0 +1,796 @@
+
+"""
+Test our tools for recovering metadata from the data pool
+"""
+import json
+
+import logging
+import os
+import time
+import traceback
+import stat
+
+from io import BytesIO, StringIO
+from collections import namedtuple, defaultdict
+from textwrap import dedent
+
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
+
+log = logging.getLogger(__name__)
+
+
+ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
+
+
+class Workload(object):
+ def __init__(self, filesystem, mount):
+ self._mount = mount
+ self._filesystem = filesystem
+ self._initial_state = None
+
+ # Accumulate backtraces for every failed validation, and return them. Backtraces
+ # are rather verbose, but we only see them when something breaks, and they
+ # let us see which check failed without having to decorate each check with
+ # a string
+ self._errors = []
+
+ def assert_equal(self, a, b):
+ try:
+ if a != b:
+ raise AssertionError("{0} != {1}".format(a, b))
+ except AssertionError as e:
+ self._errors.append(
+ ValidationError(e, traceback.format_exc(3))
+ )
+
+ def assert_not_equal(self, a, b):
+ try:
+ if a == b:
+ raise AssertionError("{0} == {1}".format(a, b))
+ except AssertionError as e:
+ self._errors.append(
+ ValidationError(e, traceback.format_exc(3))
+ )
+
+ def assert_true(self, a):
+ try:
+ if not a:
+ raise AssertionError("{0} is not true".format(a))
+ except AssertionError as e:
+ self._errors.append(
+ ValidationError(e, traceback.format_exc(3))
+ )
+
+ def write(self):
+ """
+ Write the workload files to the mount
+ """
+ raise NotImplementedError()
+
+ def validate(self):
+ """
+ Read from the mount and validate that the workload files are present (i.e. have
+ survived or been reconstructed from the test scenario)
+ """
+ raise NotImplementedError()
+
+ def damage(self):
+ """
+ Damage the filesystem pools in ways that will be interesting to recover from. By
+ default just wipe everything in the metadata pool
+ """
+ # Delete every object in the metadata pool
+ pool = self._filesystem.get_metadata_pool_name()
+ self._filesystem.rados(["purge", pool, '--yes-i-really-really-mean-it'])
+
+ def flush(self):
+ """
+ Called after client unmount, after write: flush whatever you want
+ """
+ self._filesystem.mds_asok(["flush", "journal"])
+
+ def scrub(self):
+ """
+ Called as a final step post recovery before verification. Right now, this
+ doesn't bother if errors are found in scrub - just that the MDS doesn't
+ crash and burn during scrub.
+ """
+ out_json = self._filesystem.run_scrub(["start", "/", "repair,recursive"])
+ self.assert_not_equal(out_json, None)
+ self.assert_equal(out_json["return_code"], 0)
+ self.assert_equal(self._filesystem.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+class SimpleWorkload(Workload):
+ """
+ Single file, single directory, check that it gets recovered and so does its size
+ """
+ def write(self):
+ self._mount.run_shell(["mkdir", "subdir"])
+ self._mount.write_n_mb("subdir/sixmegs", 6)
+ self._initial_state = self._mount.stat("subdir/sixmegs")
+
+ def validate(self):
+ self._mount.run_shell(["sudo", "ls", "subdir"], omit_sudo=False)
+ st = self._mount.stat("subdir/sixmegs", sudo=True)
+ self.assert_equal(st['st_size'], self._initial_state['st_size'])
+ return self._errors
+
+
+class SymlinkWorkload(Workload):
+ """
+ Symlink file, check that it gets recovered as symlink
+ """
+ def write(self):
+ self._mount.run_shell(["mkdir", "symdir"])
+ self._mount.write_n_mb("symdir/onemegs", 1)
+ self._mount.run_shell(["ln", "-s", "onemegs", "symdir/symlink_onemegs"])
+ self._mount.run_shell(["ln", "-s", "symdir/onemegs", "symlink1_onemegs"])
+
+ def validate(self):
+ self._mount.run_shell(["sudo", "ls", "symdir"], omit_sudo=False)
+ st = self._mount.lstat("symdir/symlink_onemegs")
+ self.assert_true(stat.S_ISLNK(st['st_mode']))
+ target = self._mount.readlink("symdir/symlink_onemegs")
+ self.assert_equal(target, "onemegs")
+
+ st = self._mount.lstat("symlink1_onemegs")
+ self.assert_true(stat.S_ISLNK(st['st_mode']))
+ target = self._mount.readlink("symlink1_onemegs")
+ self.assert_equal(target, "symdir/onemegs")
+ return self._errors
+
+
+class MovedFile(Workload):
+ def write(self):
+ # Create a file whose backtrace disagrees with his eventual position
+ # in the metadata. We will see that he gets reconstructed in his
+ # original position according to his backtrace.
+ self._mount.run_shell(["mkdir", "subdir_alpha"])
+ self._mount.run_shell(["mkdir", "subdir_bravo"])
+ self._mount.write_n_mb("subdir_alpha/sixmegs", 6)
+ self._filesystem.mds_asok(["flush", "journal"])
+ self._mount.run_shell(["mv", "subdir_alpha/sixmegs", "subdir_bravo/sixmegs"])
+ self._initial_state = self._mount.stat("subdir_bravo/sixmegs")
+
+ def flush(self):
+ pass
+
+ def validate(self):
+ self.assert_equal(self._mount.ls(sudo=True), ["subdir_alpha"])
+ st = self._mount.stat("subdir_alpha/sixmegs", sudo=True)
+ self.assert_equal(st['st_size'], self._initial_state['st_size'])
+ return self._errors
+
+
+class BacktracelessFile(Workload):
+ def write(self):
+ self._mount.run_shell(["mkdir", "subdir"])
+ self._mount.write_n_mb("subdir/sixmegs", 6)
+ self._initial_state = self._mount.stat("subdir/sixmegs")
+
+ def flush(self):
+ # Never flush metadata, so backtrace won't be written
+ pass
+
+ def validate(self):
+ ino_name = "%x" % self._initial_state["st_ino"]
+
+ # The inode should be linked into lost+found because we had no path for it
+ self.assert_equal(self._mount.ls(sudo=True), ["lost+found"])
+ self.assert_equal(self._mount.ls("lost+found", sudo=True), [ino_name])
+ st = self._mount.stat(f"lost+found/{ino_name}", sudo=True)
+
+ # We might not have got the name or path, but we should still get the size
+ self.assert_equal(st['st_size'], self._initial_state['st_size'])
+
+ # remove the entry from lost+found directory
+ self._mount.run_shell(["sudo", "rm", "-f", f'lost+found/{ino_name}'], omit_sudo=False)
+ self.assert_equal(self._mount.ls("lost+found", sudo=True), [])
+
+ return self._errors
+
+
+class StripedStashedLayout(Workload):
+ def __init__(self, fs, m, pool=None):
+ super(StripedStashedLayout, self).__init__(fs, m)
+
+ # Nice small stripes so we can quickly do our writes+validates
+ self.sc = 4
+ self.ss = 65536
+ self.os = 262144
+ self.pool = pool and pool or self._filesystem.get_data_pool_name()
+
+ self.interesting_sizes = [
+ # Exactly stripe_count objects will exist
+ self.os * self.sc,
+ # Fewer than stripe_count objects will exist
+ self.os * self.sc // 2,
+ self.os * (self.sc - 1) + self.os // 2,
+ self.os * (self.sc - 1) + self.os // 2 - 1,
+ self.os * (self.sc + 1) + self.os // 2,
+ self.os * (self.sc + 1) + self.os // 2 + 1,
+ # More than stripe_count objects will exist
+ self.os * self.sc + self.os * self.sc // 2
+ ]
+
+ def write(self):
+ # Create a dir with a striped layout set on it
+ self._mount.run_shell(["mkdir", "stripey"])
+
+ self._mount.setfattr("./stripey", "ceph.dir.layout",
+ "stripe_unit={ss} stripe_count={sc} object_size={os} pool={pool}".format(
+ ss=self.ss, os=self.os, sc=self.sc, pool=self.pool
+ ))
+
+ # Write files, then flush metadata so that its layout gets written into an xattr
+ for i, n_bytes in enumerate(self.interesting_sizes):
+ self._mount.write_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
+ # This is really just validating the validator
+ self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
+ self._filesystem.mds_asok(["flush", "journal"])
+
+ # Write another file in the same way, but this time don't flush the metadata,
+ # so that it won't have the layout xattr
+ self._mount.write_test_pattern("stripey/unflushed_file", 1024 * 512)
+ self._mount.validate_test_pattern("stripey/unflushed_file", 1024 * 512)
+
+ self._initial_state = {
+ "unflushed_ino": self._mount.path_to_ino("stripey/unflushed_file")
+ }
+
+ def flush(self):
+ # Pass because we already selectively flushed during write
+ pass
+
+ def validate(self):
+ # The first files should have been recovered into its original location
+ # with the correct layout: read back correct data
+ for i, n_bytes in enumerate(self.interesting_sizes):
+ try:
+ self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes)
+ except CommandFailedError as e:
+ self._errors.append(
+ ValidationError("File {0} (size {1}): {2}".format(i, n_bytes, e), traceback.format_exc(3))
+ )
+
+ # The unflushed file should have been recovered into lost+found without
+ # the correct layout: read back junk
+ ino_name = "%x" % self._initial_state["unflushed_ino"]
+ self.assert_equal(self._mount.ls("lost+found", sudo=True), [ino_name])
+ try:
+ self._mount.validate_test_pattern(os.path.join("lost+found", ino_name), 1024 * 512)
+ except CommandFailedError:
+ pass
+ else:
+ self._errors.append(
+ ValidationError("Unexpectedly valid data in unflushed striped file", "")
+ )
+
+ return self._errors
+
+
+class ManyFilesWorkload(Workload):
+ def __init__(self, filesystem, mount, file_count):
+ super(ManyFilesWorkload, self).__init__(filesystem, mount)
+ self.file_count = file_count
+
+ def write(self):
+ self._mount.run_shell(["mkdir", "subdir"])
+ for n in range(0, self.file_count):
+ self._mount.write_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024)
+
+ def validate(self):
+ for n in range(0, self.file_count):
+ try:
+ self._mount.validate_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024)
+ except CommandFailedError as e:
+ self._errors.append(
+ ValidationError("File {0}: {1}".format(n, e), traceback.format_exc(3))
+ )
+
+ return self._errors
+
+
+class MovedDir(Workload):
+ def write(self):
+ # Create a nested dir that we will then move. Two files with two different
+ # backtraces referring to the moved dir, claiming two different locations for
+ # it. We will see that only one backtrace wins and the dir ends up with
+ # single linkage.
+ self._mount.run_shell(["mkdir", "-p", "grandmother/parent"])
+ self._mount.write_n_mb("grandmother/parent/orig_pos_file", 1)
+ self._filesystem.mds_asok(["flush", "journal"])
+ self._mount.run_shell(["mkdir", "grandfather"])
+ self._mount.run_shell(["mv", "grandmother/parent", "grandfather"])
+ self._mount.write_n_mb("grandfather/parent/new_pos_file", 2)
+ self._filesystem.mds_asok(["flush", "journal"])
+
+ self._initial_state = (
+ self._mount.stat("grandfather/parent/orig_pos_file"),
+ self._mount.stat("grandfather/parent/new_pos_file")
+ )
+
+ def validate(self):
+ root_files = self._mount.ls()
+ self.assert_equal(len(root_files), 1)
+ self.assert_equal(root_files[0] in ["grandfather", "grandmother"], True)
+ winner = root_files[0]
+ st_opf = self._mount.stat(f"{winner}/parent/orig_pos_file", sudo=True)
+ st_npf = self._mount.stat(f"{winner}/parent/new_pos_file", sudo=True)
+
+ self.assert_equal(st_opf['st_size'], self._initial_state[0]['st_size'])
+ self.assert_equal(st_npf['st_size'], self._initial_state[1]['st_size'])
+
+
+class MissingZerothObject(Workload):
+ def write(self):
+ self._mount.run_shell(["mkdir", "subdir"])
+ self._mount.write_n_mb("subdir/sixmegs", 6)
+ self._initial_state = self._mount.stat("subdir/sixmegs")
+
+ def damage(self):
+ super(MissingZerothObject, self).damage()
+ zeroth_id = "{0:x}.00000000".format(self._initial_state['st_ino'])
+ self._filesystem.rados(["rm", zeroth_id], pool=self._filesystem.get_data_pool_name())
+
+ def validate(self):
+ ino = self._initial_state['st_ino']
+ st = self._mount.stat(f"lost+found/{ino:x}", sudo=True)
+ self.assert_equal(st['st_size'], self._initial_state['st_size'])
+
+
+class NonDefaultLayout(Workload):
+ """
+ Check that the reconstruction copes with files that have a different
+ object size in their layout
+ """
+ def write(self):
+ self._mount.run_shell(["touch", "datafile"])
+ self._mount.setfattr("./datafile", "ceph.file.layout.object_size", "8388608")
+ self._mount.run_shell(["dd", "if=/dev/urandom", "of=./datafile", "bs=1M", "count=32"])
+ self._initial_state = self._mount.stat("datafile")
+
+ def validate(self):
+ # Check we got the layout reconstructed properly
+ object_size = int(self._mount.getfattr("./datafile", "ceph.file.layout.object_size", sudo=True))
+ self.assert_equal(object_size, 8388608)
+
+ # Check we got the file size reconstructed properly
+ st = self._mount.stat("datafile", sudo=True)
+ self.assert_equal(st['st_size'], self._initial_state['st_size'])
+
+
+class TestDataScan(CephFSTestCase):
+ MDSS_REQUIRED = 2
+
+ def is_marked_damaged(self, rank):
+ mds_map = self.fs.get_mds_map()
+ return rank in mds_map['damaged']
+
+ def _rebuild_metadata(self, workload, workers=1):
+ """
+ That when all objects in metadata pool are removed, we can rebuild a metadata pool
+ based on the contents of a data pool, and a client can see and read our files.
+ """
+
+ # First, inject some files
+
+ workload.write()
+
+ # Unmount the client and flush the journal: the tool should also cope with
+ # situations where there is dirty metadata, but we'll test that separately
+ self.mount_a.umount_wait()
+ workload.flush()
+
+ # Stop the MDS
+ self.fs.fail()
+
+ # After recovery, we need the MDS to not be strict about stats (in production these options
+ # are off by default, but in QA we need to explicitly disable them)
+ self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
+ self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
+
+ # Apply any data damage the workload wants
+ workload.damage()
+
+ # Reset the MDS map in case multiple ranks were in play: recovery procedure
+ # only understands how to rebuild metadata under rank 0
+ self.fs.reset()
+
+ self.fs.set_joinable() # redundant with reset
+
+ def get_state(mds_id):
+ info = self.mds_cluster.get_mds_info(mds_id)
+ return info['state'] if info is not None else None
+
+ self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
+ for mds_id in self.fs.mds_ids:
+ self.wait_until_equal(
+ lambda: get_state(mds_id),
+ "up:standby",
+ timeout=60)
+
+ self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
+ self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
+ self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])
+
+ # Run the recovery procedure
+ if False:
+ with self.assertRaises(CommandFailedError):
+ # Normal reset should fail when no objects are present, we'll use --force instead
+ self.fs.journal_tool(["journal", "reset"], 0)
+
+ self.fs.journal_tool(["journal", "reset", "--force"], 0)
+ self.fs.data_scan(["init"])
+ self.fs.data_scan(["scan_extents"], worker_count=workers)
+ self.fs.data_scan(["scan_inodes"], worker_count=workers)
+ self.fs.data_scan(["scan_links"])
+
+ # Mark the MDS repaired
+ self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
+
+ # Start the MDS
+ self.fs.mds_restart()
+ self.fs.wait_for_daemons()
+ log.info(str(self.mds_cluster.status()))
+
+ # Mount a client
+ self.mount_a.mount_wait()
+
+ # run scrub as it is recommended post recovery for most
+ # (if not all) recovery mechanisms.
+ workload.scrub()
+
+ # See that the files are present and correct
+ errors = workload.validate()
+ if errors:
+ log.error("Validation errors found: {0}".format(len(errors)))
+ for e in errors:
+ log.error(e.exception)
+ log.error(e.backtrace)
+ raise AssertionError("Validation failed, first error: {0}\n{1}".format(
+ errors[0].exception, errors[0].backtrace
+ ))
+
+ def test_rebuild_simple(self):
+ self._rebuild_metadata(SimpleWorkload(self.fs, self.mount_a))
+
+ def test_rebuild_symlink(self):
+ self._rebuild_metadata(SymlinkWorkload(self.fs, self.mount_a))
+
+ def test_rebuild_moved_file(self):
+ self._rebuild_metadata(MovedFile(self.fs, self.mount_a))
+
+ def test_rebuild_backtraceless(self):
+ self._rebuild_metadata(BacktracelessFile(self.fs, self.mount_a))
+
+ def test_rebuild_moved_dir(self):
+ self._rebuild_metadata(MovedDir(self.fs, self.mount_a))
+
+ def test_rebuild_missing_zeroth(self):
+ self._rebuild_metadata(MissingZerothObject(self.fs, self.mount_a))
+
+ def test_rebuild_nondefault_layout(self):
+ self._rebuild_metadata(NonDefaultLayout(self.fs, self.mount_a))
+
+ def test_stashed_layout(self):
+ self._rebuild_metadata(StripedStashedLayout(self.fs, self.mount_a))
+
+ def _dirfrag_keys(self, object_id):
+ keys_str = self.fs.radosmo(["listomapkeys", object_id], stdout=StringIO())
+ if keys_str:
+ return keys_str.strip().split("\n")
+ else:
+ return []
+
+ def test_fragmented_injection(self):
+ """
+ That when injecting a dentry into a fragmented directory, we put it in the right fragment.
+ """
+
+ file_count = 100
+ file_names = ["%s" % n for n in range(0, file_count)]
+
+ # Make sure and disable dirfrag auto merging and splitting
+ self.fs.set_ceph_conf('mds', 'mds bal merge size', 0)
+ self.fs.set_ceph_conf('mds', 'mds bal split size', 100 * file_count)
+
+ # Create a directory of `file_count` files, each named after its
+ # decimal number and containing the string of its decimal number
+ self.mount_a.run_python(dedent("""
+ import os
+ path = os.path.join("{path}", "subdir")
+ os.mkdir(path)
+ for n in range(0, {file_count}):
+ open(os.path.join(path, "%s" % n), 'w').write("%s" % n)
+ """.format(
+ path=self.mount_a.mountpoint,
+ file_count=file_count
+ )))
+
+ dir_ino = self.mount_a.path_to_ino("subdir")
+
+ # Only one MDS should be active!
+ self.assertEqual(len(self.fs.get_active_names()), 1)
+
+ # Ensure that one directory is fragmented
+ mds_id = self.fs.get_active_names()[0]
+ self.fs.mds_asok(["dirfrag", "split", "/subdir", "0/0", "1"], mds_id)
+
+ # Flush journal and stop MDS
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(["flush", "journal"], mds_id)
+ self.fs.fail()
+
+ # Pick a dentry and wipe out its key
+ # Because I did a 1 bit split, I know one frag will be named <inode>.01000000
+ frag_obj_id = "{0:x}.01000000".format(dir_ino)
+ keys = self._dirfrag_keys(frag_obj_id)
+ victim_key = keys[7] # arbitrary choice
+ log.info("victim_key={0}".format(victim_key))
+ victim_dentry = victim_key.split("_head")[0]
+ self.fs.radosm(["rmomapkey", frag_obj_id, victim_key])
+
+ # Start filesystem back up, observe that the file appears to be gone in an `ls`
+ self.fs.set_joinable()
+ self.fs.wait_for_daemons()
+ self.mount_a.mount_wait()
+ files = self.mount_a.run_shell(["ls", "subdir/"]).stdout.getvalue().strip().split("\n")
+ self.assertListEqual(sorted(files), sorted(list(set(file_names) - set([victim_dentry]))))
+
+ # Stop the filesystem
+ self.mount_a.umount_wait()
+ self.fs.fail()
+
+ # Run data-scan, observe that it inserts our dentry back into the correct fragment
+ # by checking the omap now has the dentry's key again
+ self.fs.data_scan(["scan_extents"])
+ self.fs.data_scan(["scan_inodes"])
+ self.fs.data_scan(["scan_links"])
+ self.assertIn(victim_key, self._dirfrag_keys(frag_obj_id))
+
+ # Start the filesystem and check that the dentry we deleted is now once again visible
+ # and points to the correct file data.
+ self.fs.set_joinable()
+ self.fs.wait_for_daemons()
+ self.mount_a.mount_wait()
+ self.mount_a.run_shell(["ls", "-l", "subdir/"]) # debugging
+ # Use sudo because cephfs-data-scan will reinsert the dentry with root ownership, it can't know the real owner.
+ out = self.mount_a.run_shell_payload(f"sudo cat subdir/{victim_dentry}", omit_sudo=False).stdout.getvalue().strip()
+ self.assertEqual(out, victim_dentry)
+
+ # Finally, close the loop by checking our injected dentry survives a merge
+ mds_id = self.fs.get_active_names()[0]
+ self.mount_a.ls("subdir") # Do an ls to ensure both frags are in cache so the merge will work
+ self.fs.mds_asok(["dirfrag", "merge", "/subdir", "0/0"], mds_id)
+ self.fs.mds_asok(["flush", "journal"], mds_id)
+ frag_obj_id = "{0:x}.00000000".format(dir_ino)
+ keys = self._dirfrag_keys(frag_obj_id)
+ self.assertListEqual(sorted(keys), sorted(["%s_head" % f for f in file_names]))
+
+ # run scrub to update and make sure rstat.rbytes info in subdir inode and dirfrag
+ # are matched
+ out_json = self.fs.run_scrub(["start", "/subdir", "repair,recursive"])
+ self.assertNotEqual(out_json, None)
+ self.assertEqual(out_json["return_code"], 0)
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+ # Remove the whole 'sudbdir' directory
+ self.mount_a.run_shell(["rm", "-rf", "subdir/"])
+
+ @for_teuthology
+ def test_parallel_execution(self):
+ self._rebuild_metadata(ManyFilesWorkload(self.fs, self.mount_a, 25), workers=7)
+
+ def test_pg_files(self):
+ """
+ That the pg files command tells us which files are associated with
+ a particular PG
+ """
+ file_count = 20
+ self.mount_a.run_shell(["mkdir", "mydir"])
+ self.mount_a.create_n_files("mydir/myfile", file_count)
+
+ # Some files elsewhere in the system that we will ignore
+ # to check that the tool is filtering properly
+ self.mount_a.run_shell(["mkdir", "otherdir"])
+ self.mount_a.create_n_files("otherdir/otherfile", file_count)
+
+ pgs_to_files = defaultdict(list)
+ # Rough (slow) reimplementation of the logic
+ for i in range(0, file_count):
+ file_path = "mydir/myfile_{0}".format(i)
+ ino = self.mount_a.path_to_ino(file_path)
+ obj = "{0:x}.{1:08x}".format(ino, 0)
+ pgid = json.loads(self.fs.mon_manager.raw_cluster_cmd(
+ "osd", "map", self.fs.get_data_pool_name(), obj,
+ "--format=json-pretty"
+ ))['pgid']
+ pgs_to_files[pgid].append(file_path)
+ log.info("{0}: {1}".format(file_path, pgid))
+
+ pg_count = self.fs.get_pool_pg_num(self.fs.get_data_pool_name())
+ for pg_n in range(0, pg_count):
+ pg_str = "{0}.{1:x}".format(self.fs.get_data_pool_id(), pg_n)
+ out = self.fs.data_scan(["pg_files", "mydir", pg_str])
+ lines = [l for l in out.split("\n") if l]
+ log.info("{0}: {1}".format(pg_str, lines))
+ self.assertSetEqual(set(lines), set(pgs_to_files[pg_str]))
+
+ def test_rebuild_linkage(self):
+ """
+ The scan_links command fixes linkage errors
+ """
+ self.mount_a.run_shell(["mkdir", "testdir1"])
+ self.mount_a.run_shell(["mkdir", "testdir2"])
+ dir1_ino = self.mount_a.path_to_ino("testdir1")
+ dir2_ino = self.mount_a.path_to_ino("testdir2")
+ dirfrag1_oid = "{0:x}.00000000".format(dir1_ino)
+ dirfrag2_oid = "{0:x}.00000000".format(dir2_ino)
+
+ self.mount_a.run_shell(["touch", "testdir1/file1"])
+ self.mount_a.run_shell(["ln", "testdir1/file1", "testdir1/link1"])
+ self.mount_a.run_shell(["ln", "testdir1/file1", "testdir2/link2"])
+
+ mds_id = self.fs.get_active_names()[0]
+ self.fs.mds_asok(["flush", "journal"], mds_id)
+
+ dirfrag1_keys = self._dirfrag_keys(dirfrag1_oid)
+
+ # introduce duplicated primary link
+ file1_key = "file1_head"
+ self.assertIn(file1_key, dirfrag1_keys)
+ file1_omap_data = self.fs.radosmo(["getomapval", dirfrag1_oid, file1_key, '-'])
+ self.fs.radosm(["setomapval", dirfrag2_oid, file1_key], stdin=BytesIO(file1_omap_data))
+ self.assertIn(file1_key, self._dirfrag_keys(dirfrag2_oid))
+
+ # remove a remote link, make inode link count incorrect
+ link1_key = 'link1_head'
+ self.assertIn(link1_key, dirfrag1_keys)
+ self.fs.radosm(["rmomapkey", dirfrag1_oid, link1_key])
+
+ # increase good primary link's version
+ self.mount_a.run_shell(["touch", "testdir1/file1"])
+ self.mount_a.umount_wait()
+
+ self.fs.mds_asok(["flush", "journal"], mds_id)
+ self.fs.fail()
+
+ # repair linkage errors
+ self.fs.data_scan(["scan_links"])
+
+ # primary link in testdir2 was deleted?
+ self.assertNotIn(file1_key, self._dirfrag_keys(dirfrag2_oid))
+
+ self.fs.set_joinable()
+ self.fs.wait_for_daemons()
+
+ self.mount_a.mount_wait()
+
+ # link count was adjusted?
+ file1_nlink = self.mount_a.path_to_nlink("testdir1/file1")
+ self.assertEqual(file1_nlink, 2)
+
+ out_json = self.fs.run_scrub(["start", "/testdir1", "repair,recursive"])
+ self.assertNotEqual(out_json, None)
+ self.assertEqual(out_json["return_code"], 0)
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+ def test_rebuild_inotable(self):
+ """
+ The scan_links command repair inotables
+ """
+ self.fs.set_max_mds(2)
+ self.fs.wait_for_daemons()
+
+ active_mds_names = self.fs.get_active_names()
+ mds0_id = active_mds_names[0]
+ mds1_id = active_mds_names[1]
+
+ self.mount_a.run_shell(["mkdir", "dir1"])
+ dir_ino = self.mount_a.path_to_ino("dir1")
+ self.mount_a.setfattr("dir1", "ceph.dir.pin", "1")
+ # wait for subtree migration
+
+ file_ino = 0;
+ while True:
+ time.sleep(1)
+ # allocate an inode from mds.1
+ self.mount_a.run_shell(["touch", "dir1/file1"])
+ file_ino = self.mount_a.path_to_ino("dir1/file1")
+ if file_ino >= (2 << 40):
+ break
+ self.mount_a.run_shell(["rm", "-f", "dir1/file1"])
+
+ self.mount_a.umount_wait()
+
+ self.fs.mds_asok(["flush", "journal"], mds0_id)
+ self.fs.mds_asok(["flush", "journal"], mds1_id)
+ self.fs.fail()
+
+ self.fs.radosm(["rm", "mds0_inotable"])
+ self.fs.radosm(["rm", "mds1_inotable"])
+
+ self.fs.data_scan(["scan_links", "--filesystem", self.fs.name])
+
+ mds0_inotable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "inode"]))
+ self.assertGreaterEqual(
+ mds0_inotable['0']['data']['inotable']['free'][0]['start'], dir_ino)
+
+ mds1_inotable = json.loads(self.fs.table_tool([self.fs.name + ":1", "show", "inode"]))
+ self.assertGreaterEqual(
+ mds1_inotable['1']['data']['inotable']['free'][0]['start'], file_ino)
+
+ self.fs.set_joinable()
+ self.fs.wait_for_daemons()
+
+ out_json = self.fs.run_scrub(["start", "/dir1", "repair,recursive"])
+ self.assertNotEqual(out_json, None)
+ self.assertEqual(out_json["return_code"], 0)
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+ def test_rebuild_snaptable(self):
+ """
+ The scan_links command repair snaptable
+ """
+ self.fs.set_allow_new_snaps(True)
+
+ self.mount_a.run_shell(["mkdir", "dir1"])
+ self.mount_a.run_shell(["mkdir", "dir1/.snap/s1"])
+ self.mount_a.run_shell(["mkdir", "dir1/.snap/s2"])
+ self.mount_a.run_shell(["rmdir", "dir1/.snap/s2"])
+
+ self.mount_a.umount_wait()
+
+ mds0_id = self.fs.get_active_names()[0]
+ self.fs.mds_asok(["flush", "journal"], mds0_id)
+
+ # wait for mds to update removed snaps
+ time.sleep(10)
+
+ old_snaptable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "snap"]))
+ # stamps may have minor difference
+ for item in old_snaptable['snapserver']['snaps']:
+ del item['stamp']
+
+ self.fs.radosm(["rm", "mds_snaptable"])
+ self.fs.data_scan(["scan_links", "--filesystem", self.fs.name])
+
+ new_snaptable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "snap"]))
+ for item in new_snaptable['snapserver']['snaps']:
+ del item['stamp']
+ self.assertGreaterEqual(
+ new_snaptable['snapserver']['last_snap'], old_snaptable['snapserver']['last_snap'])
+ self.assertEqual(
+ new_snaptable['snapserver']['snaps'], old_snaptable['snapserver']['snaps'])
+
+ out_json = self.fs.run_scrub(["start", "/dir1", "repair,recursive"])
+ self.assertNotEqual(out_json, None)
+ self.assertEqual(out_json["return_code"], 0)
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+ def _prepare_extra_data_pool(self, set_root_layout=True):
+ extra_data_pool_name = self.fs.get_data_pool_name() + '_extra'
+ self.fs.add_data_pool(extra_data_pool_name)
+ if set_root_layout:
+ self.mount_a.setfattr(".", "ceph.dir.layout.pool",
+ extra_data_pool_name)
+ return extra_data_pool_name
+
+ def test_extra_data_pool_rebuild_simple(self):
+ self._prepare_extra_data_pool()
+ self._rebuild_metadata(SimpleWorkload(self.fs, self.mount_a))
+
+ def test_extra_data_pool_rebuild_few_files(self):
+ self._prepare_extra_data_pool()
+ self._rebuild_metadata(ManyFilesWorkload(self.fs, self.mount_a, 5), workers=1)
+
+ @for_teuthology
+ def test_extra_data_pool_rebuild_many_files_many_workers(self):
+ self._prepare_extra_data_pool()
+ self._rebuild_metadata(ManyFilesWorkload(self.fs, self.mount_a, 25), workers=7)
+
+ def test_extra_data_pool_stashed_layout(self):
+ pool_name = self._prepare_extra_data_pool(False)
+ self._rebuild_metadata(StripedStashedLayout(self.fs, self.mount_a, pool_name))
diff --git a/qa/tasks/cephfs/test_dump_tree.py b/qa/tasks/cephfs/test_dump_tree.py
new file mode 100644
index 000000000..48a2c6f00
--- /dev/null
+++ b/qa/tasks/cephfs/test_dump_tree.py
@@ -0,0 +1,66 @@
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+import random
+import os
+
+class TestDumpTree(CephFSTestCase):
+ def get_paths_to_ino(self):
+ inos = {}
+ p = self.mount_a.run_shell(["find", "./"])
+ paths = p.stdout.getvalue().strip().split()
+ for path in paths:
+ inos[path] = self.mount_a.path_to_ino(path, False)
+
+ return inos
+
+ def populate(self):
+ self.mount_a.run_shell(["git", "clone",
+ "https://github.com/ceph/ceph-qa-suite"])
+
+ def test_basic(self):
+ self.mount_a.run_shell(["mkdir", "parent"])
+ self.mount_a.run_shell(["mkdir", "parent/child"])
+ self.mount_a.run_shell(["touch", "parent/child/file"])
+ self.mount_a.run_shell(["mkdir", "parent/child/grandchild"])
+ self.mount_a.run_shell(["touch", "parent/child/grandchild/file"])
+
+ inos = self.get_paths_to_ino()
+ tree = self.fs.mds_asok(["dump", "tree", "/parent/child", "1"])
+
+ target_inos = [inos["./parent/child"], inos["./parent/child/file"],
+ inos["./parent/child/grandchild"]]
+
+ for ino in tree:
+ del target_inos[target_inos.index(ino['ino'])] # don't catch!
+
+ assert(len(target_inos) == 0)
+
+ def test_random(self):
+ random.seed(0)
+
+ self.populate()
+ inos = self.get_paths_to_ino()
+ target = random.sample(inos.keys(), 1)[0]
+
+ if target != "./":
+ target = os.path.dirname(target)
+
+ subtree = [path for path in inos.keys() if path.startswith(target)]
+ target_inos = [inos[path] for path in subtree]
+ tree = self.fs.mds_asok(["dump", "tree", target[1:]])
+
+ for ino in tree:
+ del target_inos[target_inos.index(ino['ino'])] # don't catch!
+
+ assert(len(target_inos) == 0)
+
+ target_depth = target.count('/')
+ maxdepth = max([path.count('/') for path in subtree]) - target_depth
+ depth = random.randint(0, maxdepth)
+ target_inos = [inos[path] for path in subtree \
+ if path.count('/') <= depth + target_depth]
+ tree = self.fs.mds_asok(["dump", "tree", target[1:], str(depth)])
+
+ for ino in tree:
+ del target_inos[target_inos.index(ino['ino'])] # don't catch!
+
+ assert(len(target_inos) == 0)
diff --git a/qa/tasks/cephfs/test_exports.py b/qa/tasks/cephfs/test_exports.py
new file mode 100644
index 000000000..4b7e884ec
--- /dev/null
+++ b/qa/tasks/cephfs/test_exports.py
@@ -0,0 +1,582 @@
+import logging
+import random
+import time
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+
+log = logging.getLogger(__name__)
+
+class TestExports(CephFSTestCase):
+ MDSS_REQUIRED = 2
+ CLIENTS_REQUIRED = 2
+
+ def test_session_race(self):
+ """
+ Test session creation race.
+
+ See: https://tracker.ceph.com/issues/24072#change-113056
+ """
+
+ self.fs.set_max_mds(2)
+ status = self.fs.wait_for_daemons()
+
+ rank1 = self.fs.get_rank(rank=1, status=status)
+
+ # Create a directory that is pre-exported to rank 1
+ self.mount_a.run_shell(["mkdir", "-p", "a/aa"])
+ self.mount_a.setfattr("a", "ceph.dir.pin", "1")
+ self._wait_subtrees([('/a', 1)], status=status, rank=1)
+
+ # Now set the mds config to allow the race
+ self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "true"], rank=1)
+
+ # Now create another directory and try to export it
+ self.mount_b.run_shell(["mkdir", "-p", "b/bb"])
+ self.mount_b.setfattr("b", "ceph.dir.pin", "1")
+
+ time.sleep(5)
+
+ # Now turn off the race so that it doesn't wait again
+ self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "false"], rank=1)
+
+ # Now try to create a session with rank 1 by accessing a dir known to
+ # be there, if buggy, this should cause the rank 1 to crash:
+ self.mount_b.run_shell(["ls", "a"])
+
+ # Check if rank1 changed (standby tookover?)
+ new_rank1 = self.fs.get_rank(rank=1)
+ self.assertEqual(rank1['gid'], new_rank1['gid'])
+
+class TestExportPin(CephFSTestCase):
+ MDSS_REQUIRED = 3
+ CLIENTS_REQUIRED = 1
+
+ def setUp(self):
+ CephFSTestCase.setUp(self)
+
+ self.fs.set_max_mds(3)
+ self.status = self.fs.wait_for_daemons()
+
+ self.mount_a.run_shell_payload("mkdir -p 1/2/3/4")
+
+ def test_noop(self):
+ self.mount_a.setfattr("1", "ceph.dir.pin", "-1")
+ time.sleep(30) # for something to not happen
+ self._wait_subtrees([], status=self.status)
+
+ def test_negative(self):
+ self.mount_a.setfattr("1", "ceph.dir.pin", "-2341")
+ time.sleep(30) # for something to not happen
+ self._wait_subtrees([], status=self.status)
+
+ def test_empty_pin(self):
+ self.mount_a.setfattr("1/2/3/4", "ceph.dir.pin", "1")
+ time.sleep(30) # for something to not happen
+ self._wait_subtrees([], status=self.status)
+
+ def test_trivial(self):
+ self.mount_a.setfattr("1", "ceph.dir.pin", "1")
+ self._wait_subtrees([('/1', 1)], status=self.status, rank=1)
+
+ def test_export_targets(self):
+ self.mount_a.setfattr("1", "ceph.dir.pin", "1")
+ self._wait_subtrees([('/1', 1)], status=self.status, rank=1)
+ self.status = self.fs.status()
+ r0 = self.status.get_rank(self.fs.id, 0)
+ self.assertTrue(sorted(r0['export_targets']) == [1])
+
+ def test_redundant(self):
+ # redundant pin /1/2 to rank 1
+ self.mount_a.setfattr("1", "ceph.dir.pin", "1")
+ self._wait_subtrees([('/1', 1)], status=self.status, rank=1)
+ self.mount_a.setfattr("1/2", "ceph.dir.pin", "1")
+ self._wait_subtrees([('/1', 1), ('/1/2', 1)], status=self.status, rank=1)
+
+ def test_reassignment(self):
+ self.mount_a.setfattr("1/2", "ceph.dir.pin", "1")
+ self._wait_subtrees([('/1/2', 1)], status=self.status, rank=1)
+ self.mount_a.setfattr("1/2", "ceph.dir.pin", "0")
+ self._wait_subtrees([('/1/2', 0)], status=self.status, rank=0)
+
+ def test_phantom_rank(self):
+ self.mount_a.setfattr("1", "ceph.dir.pin", "0")
+ self.mount_a.setfattr("1/2", "ceph.dir.pin", "10")
+ time.sleep(30) # wait for nothing weird to happen
+ self._wait_subtrees([('/1', 0)], status=self.status)
+
+ def test_nested(self):
+ self.mount_a.setfattr("1", "ceph.dir.pin", "1")
+ self.mount_a.setfattr("1/2", "ceph.dir.pin", "0")
+ self.mount_a.setfattr("1/2/3", "ceph.dir.pin", "2")
+ self._wait_subtrees([('/1', 1), ('/1/2', 0), ('/1/2/3', 2)], status=self.status, rank=2)
+
+ def test_nested_unset(self):
+ self.mount_a.setfattr("1", "ceph.dir.pin", "1")
+ self.mount_a.setfattr("1/2", "ceph.dir.pin", "2")
+ self._wait_subtrees([('/1', 1), ('/1/2', 2)], status=self.status, rank=1)
+ self.mount_a.setfattr("1/2", "ceph.dir.pin", "-1")
+ self._wait_subtrees([('/1', 1)], status=self.status, rank=1)
+
+ def test_rename(self):
+ self.mount_a.setfattr("1", "ceph.dir.pin", "1")
+ self.mount_a.run_shell_payload("mkdir -p 9/8/7")
+ self.mount_a.setfattr("9/8", "ceph.dir.pin", "0")
+ self._wait_subtrees([('/1', 1), ("/9/8", 0)], status=self.status, rank=0)
+ self.mount_a.run_shell_payload("mv 9/8 1/2")
+ self._wait_subtrees([('/1', 1), ("/1/2/8", 0)], status=self.status, rank=0)
+
+ def test_getfattr(self):
+ # pin /1 to rank 0
+ self.mount_a.setfattr("1", "ceph.dir.pin", "1")
+ self.mount_a.setfattr("1/2", "ceph.dir.pin", "0")
+ self._wait_subtrees([('/1', 1), ('/1/2', 0)], status=self.status, rank=1)
+
+ if not isinstance(self.mount_a, FuseMount):
+ p = self.mount_a.client_remote.sh('uname -r', wait=True)
+ dir_pin = self.mount_a.getfattr("1", "ceph.dir.pin")
+ log.debug("mount.getfattr('1','ceph.dir.pin'): %s " % dir_pin)
+ if str(p) < "5" and not(dir_pin):
+ self.skipTest("Kernel does not support getting the extended attribute ceph.dir.pin")
+ self.assertEqual(self.mount_a.getfattr("1", "ceph.dir.pin"), '1')
+ self.assertEqual(self.mount_a.getfattr("1/2", "ceph.dir.pin"), '0')
+
+ def test_export_pin_cache_drop(self):
+ """
+ That the export pin does not prevent empty (nothing in cache) subtree merging.
+ """
+
+ self.mount_a.setfattr("1", "ceph.dir.pin", "0")
+ self.mount_a.setfattr("1/2", "ceph.dir.pin", "1")
+ self._wait_subtrees([('/1', 0), ('/1/2', 1)], status=self.status)
+ self.mount_a.umount_wait() # release all caps
+ def _drop():
+ self.fs.ranks_tell(["cache", "drop"], status=self.status)
+ # drop cache multiple times to clear replica pins
+ self._wait_subtrees([], status=self.status, action=_drop)
+
+ def test_open_file(self):
+ """
+ Test opening a file via a hard link that is not in the same mds as the inode.
+
+ See https://tracker.ceph.com/issues/58411
+ """
+
+ self.mount_a.run_shell_payload("mkdir -p target link")
+ self.mount_a.touch("target/test.txt")
+ self.mount_a.run_shell_payload("ln target/test.txt link/test.txt")
+ self.mount_a.setfattr("target", "ceph.dir.pin", "0")
+ self.mount_a.setfattr("link", "ceph.dir.pin", "1")
+ self._wait_subtrees([("/target", 0), ("/link", 1)], status=self.status)
+
+ # Release client cache, otherwise the bug may not be triggered even if buggy.
+ self.mount_a.remount()
+
+ # Open the file with access mode(O_CREAT|O_WRONLY|O_TRUNC),
+ # this should cause the rank 1 to crash if buggy.
+ # It's OK to use 'truncate -s 0 link/test.txt' here,
+ # its access mode is (O_CREAT|O_WRONLY), it can also trigger this bug.
+ log.info("test open mode (O_CREAT|O_WRONLY|O_TRUNC)")
+ proc = self.mount_a.open_for_writing("link/test.txt")
+ time.sleep(1)
+ success = proc.finished and self.fs.rank_is_running(rank=1)
+
+ # Test other write modes too.
+ if success:
+ self.mount_a.remount()
+ log.info("test open mode (O_WRONLY|O_TRUNC)")
+ proc = self.mount_a.open_for_writing("link/test.txt", creat=False)
+ time.sleep(1)
+ success = proc.finished and self.fs.rank_is_running(rank=1)
+ if success:
+ self.mount_a.remount()
+ log.info("test open mode (O_CREAT|O_WRONLY)")
+ proc = self.mount_a.open_for_writing("link/test.txt", trunc=False)
+ time.sleep(1)
+ success = proc.finished and self.fs.rank_is_running(rank=1)
+
+ # Test open modes too.
+ if success:
+ self.mount_a.remount()
+ log.info("test open mode (O_RDONLY)")
+ proc = self.mount_a.open_for_reading("link/test.txt")
+ time.sleep(1)
+ success = proc.finished and self.fs.rank_is_running(rank=1)
+
+ if success:
+ # All tests done, rank 1 didn't crash.
+ return
+
+ if not proc.finished:
+ log.warning("open operation is blocked, kill it")
+ proc.kill()
+
+ if not self.fs.rank_is_running(rank=1):
+ log.warning("rank 1 crashed")
+
+ self.mount_a.umount_wait(force=True)
+
+ self.assertTrue(success, "open operation failed")
+
+class TestEphemeralPins(CephFSTestCase):
+ MDSS_REQUIRED = 3
+ CLIENTS_REQUIRED = 1
+
+ def setUp(self):
+ CephFSTestCase.setUp(self)
+
+ self.config_set('mds', 'mds_export_ephemeral_random', True)
+ self.config_set('mds', 'mds_export_ephemeral_distributed', True)
+ self.config_set('mds', 'mds_export_ephemeral_random_max', 1.0)
+
+ self.mount_a.run_shell_payload("""
+set -e
+
+# Use up a random number of inode numbers so the ephemeral pinning is not the same every test.
+mkdir .inode_number_thrash
+count=$((RANDOM % 1024))
+for ((i = 0; i < count; i++)); do touch .inode_number_thrash/$i; done
+rm -rf .inode_number_thrash
+""")
+
+ self.fs.set_max_mds(3)
+ self.status = self.fs.wait_for_daemons()
+
+ def _setup_tree(self, path="tree", export=-1, distributed=False, random=0.0, count=100, wait=True):
+ return self.mount_a.run_shell_payload(f"""
+set -ex
+mkdir -p {path}
+{f"setfattr -n ceph.dir.pin -v {export} {path}" if export >= 0 else ""}
+{f"setfattr -n ceph.dir.pin.distributed -v 1 {path}" if distributed else ""}
+{f"setfattr -n ceph.dir.pin.random -v {random} {path}" if random > 0.0 else ""}
+for ((i = 0; i < {count}; i++)); do
+ mkdir -p "{path}/$i"
+ echo file > "{path}/$i/file"
+done
+""", wait=wait)
+
+ def test_ephemeral_pin_dist_override(self):
+ """
+ That an ephemeral distributed pin overrides a normal export pin.
+ """
+
+ self._setup_tree(distributed=True)
+ subtrees = self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
+ for s in subtrees:
+ path = s['dir']['path']
+ if path == '/tree':
+ self.assertTrue(s['distributed_ephemeral_pin'])
+
+ def test_ephemeral_pin_dist_override_pin(self):
+ """
+ That an export pin overrides an ephemerally pinned directory.
+ """
+
+ self._setup_tree(distributed=True)
+ subtrees = self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
+ self.mount_a.setfattr("tree", "ceph.dir.pin", "0")
+ time.sleep(15)
+ subtrees = self._get_subtrees(status=self.status, rank=0)
+ for s in subtrees:
+ path = s['dir']['path']
+ if path == '/tree':
+ self.assertEqual(s['auth_first'], 0)
+ self.assertFalse(s['distributed_ephemeral_pin'])
+ # it has been merged into /tree
+
+ def test_ephemeral_pin_dist_off(self):
+ """
+ That turning off ephemeral distributed pin merges subtrees.
+ """
+
+ self._setup_tree(distributed=True)
+ self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
+ self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "0")
+ time.sleep(15)
+ subtrees = self._get_subtrees(status=self.status, rank=0)
+ for s in subtrees:
+ path = s['dir']['path']
+ if path == '/tree':
+ self.assertFalse(s['distributed_ephemeral_pin'])
+
+
+ def test_ephemeral_pin_dist_conf_off(self):
+ """
+ That turning off ephemeral distributed pin config prevents distribution.
+ """
+
+ self._setup_tree()
+ self.config_set('mds', 'mds_export_ephemeral_distributed', False)
+ self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "1")
+ time.sleep(15)
+ subtrees = self._get_subtrees(status=self.status, rank=0)
+ for s in subtrees:
+ path = s['dir']['path']
+ if path == '/tree':
+ self.assertFalse(s['distributed_ephemeral_pin'])
+
+ def _test_ephemeral_pin_dist_conf_off_merge(self):
+ """
+ That turning off ephemeral distributed pin config merges subtrees.
+ FIXME: who triggers the merge?
+ """
+
+ self._setup_tree(distributed=True)
+ self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
+ self.config_set('mds', 'mds_export_ephemeral_distributed', False)
+ self._wait_subtrees([('/tree', 0)], timeout=60, status=self.status)
+
+ def test_ephemeral_pin_dist_override_before(self):
+ """
+ That a conventional export pin overrides the distributed policy _before_ distributed policy is set.
+ """
+
+ count = 10
+ self._setup_tree(count=count)
+ test = []
+ for i in range(count):
+ path = f"tree/{i}"
+ self.mount_a.setfattr(path, "ceph.dir.pin", "1")
+ test.append(("/"+path, 1))
+ self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "1")
+ time.sleep(15) # for something to not happen...
+ self._wait_subtrees(test, timeout=60, status=self.status, rank="all", path="/tree/")
+
+ def test_ephemeral_pin_dist_override_after(self):
+ """
+ That a conventional export pin overrides the distributed policy _after_ distributed policy is set.
+ """
+
+ self._setup_tree(distributed=True)
+ self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
+ test = []
+ for i in range(10):
+ path = f"tree/{i}"
+ self.mount_a.setfattr(path, "ceph.dir.pin", "1")
+ test.append(("/"+path, 1))
+ self._wait_subtrees(test, timeout=60, status=self.status, rank="all", path="/tree/")
+
+ def test_ephemeral_pin_dist_failover(self):
+ """
+ That MDS failover does not cause unnecessary migrations.
+ """
+
+ # pin /tree so it does not export during failover
+ self._setup_tree(distributed=True)
+ self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all")
+ #test = [(s['dir']['path'], s['auth_first']) for s in subtrees]
+ before = self.fs.ranks_perf(lambda p: p['mds']['exported'])
+ log.info(f"export stats: {before}")
+ self.fs.rank_fail(rank=1)
+ self.status = self.fs.wait_for_daemons()
+ time.sleep(10) # waiting for something to not happen
+ after = self.fs.ranks_perf(lambda p: p['mds']['exported'])
+ log.info(f"export stats: {after}")
+ self.assertEqual(before, after)
+
+ def test_ephemeral_pin_distribution(self):
+ """
+ That ephemerally pinned subtrees are somewhat evenly distributed.
+ """
+
+ max_mds = 3
+ frags = 128
+
+ self.fs.set_max_mds(max_mds)
+ self.status = self.fs.wait_for_daemons()
+
+ self.config_set('mds', 'mds_export_ephemeral_distributed_factor', (frags-1) / max_mds)
+ self._setup_tree(count=1000, distributed=True)
+
+ subtrees = self._wait_distributed_subtrees(frags, status=self.status, rank="all")
+ nsubtrees = len(subtrees)
+
+ # Check if distribution is uniform
+ rank0 = list(filter(lambda x: x['auth_first'] == 0, subtrees))
+ rank1 = list(filter(lambda x: x['auth_first'] == 1, subtrees))
+ rank2 = list(filter(lambda x: x['auth_first'] == 2, subtrees))
+ self.assertGreaterEqual(len(rank0)/nsubtrees, 0.15)
+ self.assertGreaterEqual(len(rank1)/nsubtrees, 0.15)
+ self.assertGreaterEqual(len(rank2)/nsubtrees, 0.15)
+
+
+ def test_ephemeral_random(self):
+ """
+ That 100% randomness causes all children to be pinned.
+ """
+ self._setup_tree(random=1.0)
+ self._wait_random_subtrees(100, status=self.status, rank="all")
+
+ def test_ephemeral_random_max(self):
+ """
+ That the config mds_export_ephemeral_random_max is not exceeded.
+ """
+
+ r = 0.5
+ count = 1000
+ self._setup_tree(count=count, random=r)
+ subtrees = self._wait_random_subtrees(int(r*count*.75), status=self.status, rank="all")
+ self.config_set('mds', 'mds_export_ephemeral_random_max', 0.01)
+ self._setup_tree(path="tree/new", count=count)
+ time.sleep(30) # for something not to happen...
+ subtrees = self._get_subtrees(status=self.status, rank="all", path="tree/new/")
+ self.assertLessEqual(len(subtrees), int(.01*count*1.25))
+
+ def test_ephemeral_random_max_config(self):
+ """
+ That the config mds_export_ephemeral_random_max config rejects new OOB policies.
+ """
+
+ self.config_set('mds', 'mds_export_ephemeral_random_max', 0.01)
+ try:
+ p = self._setup_tree(count=1, random=0.02, wait=False)
+ p.wait()
+ except CommandFailedError as e:
+ log.info(f"{e}")
+ self.assertIn("Invalid", p.stderr.getvalue())
+ else:
+ raise RuntimeError("mds_export_ephemeral_random_max ignored!")
+
+ def test_ephemeral_random_dist(self):
+ """
+ That ephemeral distributed pin overrides ephemeral random pin
+ """
+
+ self._setup_tree(random=1.0, distributed=True)
+ self._wait_distributed_subtrees(3 * 2, status=self.status)
+
+ time.sleep(15)
+ subtrees = self._get_subtrees(status=self.status, rank=0)
+ for s in subtrees:
+ path = s['dir']['path']
+ if path.startswith('/tree'):
+ self.assertFalse(s['random_ephemeral_pin'])
+
+ def test_ephemeral_random_pin_override_before(self):
+ """
+ That a conventional export pin overrides the random policy before creating new directories.
+ """
+
+ self._setup_tree(count=0, random=1.0)
+ self._setup_tree(path="tree/pin", count=10, export=1)
+ self._wait_subtrees([("/tree/pin", 1)], status=self.status, rank=1, path="/tree/pin")
+
+ def test_ephemeral_random_pin_override_after(self):
+ """
+ That a conventional export pin overrides the random policy after creating new directories.
+ """
+
+ count = 10
+ self._setup_tree(count=0, random=1.0)
+ self._setup_tree(path="tree/pin", count=count)
+ self._wait_random_subtrees(count+1, status=self.status, rank="all")
+ self.mount_a.setfattr("tree/pin", "ceph.dir.pin", "1")
+ self._wait_subtrees([("/tree/pin", 1)], status=self.status, rank=1, path="/tree/pin")
+
+ def test_ephemeral_randomness(self):
+ """
+ That the randomness is reasonable.
+ """
+
+ r = random.uniform(0.25, 0.75) # ratios don't work for small r!
+ count = 1000
+ self._setup_tree(count=count, random=r)
+ subtrees = self._wait_random_subtrees(int(r*count*.50), status=self.status, rank="all")
+ time.sleep(30) # for max to not be exceeded
+ subtrees = self._wait_random_subtrees(int(r*count*.50), status=self.status, rank="all")
+ self.assertLessEqual(len(subtrees), int(r*count*1.50))
+
+ def test_ephemeral_random_cache_drop(self):
+ """
+ That the random ephemeral pin does not prevent empty (nothing in cache) subtree merging.
+ """
+
+ count = 100
+ self._setup_tree(count=count, random=1.0)
+ self._wait_random_subtrees(count, status=self.status, rank="all")
+ self.mount_a.umount_wait() # release all caps
+ def _drop():
+ self.fs.ranks_tell(["cache", "drop"], status=self.status)
+ self._wait_subtrees([], status=self.status, action=_drop)
+
+ def test_ephemeral_random_failover(self):
+ """
+ That the random ephemeral pins stay pinned across MDS failover.
+ """
+
+ count = 100
+ r = 0.5
+ self._setup_tree(count=count, random=r)
+ # wait for all random subtrees to be created, not a specific count
+ time.sleep(30)
+ subtrees = self._wait_random_subtrees(1, status=self.status, rank=1)
+ before = [(s['dir']['path'], s['auth_first']) for s in subtrees]
+ before.sort();
+
+ self.fs.rank_fail(rank=1)
+ self.status = self.fs.wait_for_daemons()
+
+ time.sleep(30) # waiting for something to not happen
+ subtrees = self._wait_random_subtrees(1, status=self.status, rank=1)
+ after = [(s['dir']['path'], s['auth_first']) for s in subtrees]
+ after.sort();
+ log.info(f"subtrees before: {before}")
+ log.info(f"subtrees after: {after}")
+
+ self.assertEqual(before, after)
+
+ def test_ephemeral_pin_grow_mds(self):
+ """
+ That consistent hashing works to reduce the number of migrations.
+ """
+
+ self.fs.set_max_mds(2)
+ self.status = self.fs.wait_for_daemons()
+
+ self._setup_tree(random=1.0)
+ subtrees_old = self._wait_random_subtrees(100, status=self.status, rank="all")
+
+ self.fs.set_max_mds(3)
+ self.status = self.fs.wait_for_daemons()
+
+ # Sleeping for a while to allow the ephemeral pin migrations to complete
+ time.sleep(30)
+
+ subtrees_new = self._wait_random_subtrees(100, status=self.status, rank="all")
+ count = 0
+ for old_subtree in subtrees_old:
+ for new_subtree in subtrees_new:
+ if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']):
+ count = count + 1
+ break
+
+ log.info("{0} migrations have occured due to the cluster resizing".format(count))
+ # ~50% of subtrees from the two rank will migrate to another rank
+ self.assertLessEqual((count/len(subtrees_old)), (0.5)*1.25) # with 25% overbudget
+
+ def test_ephemeral_pin_shrink_mds(self):
+ """
+ That consistent hashing works to reduce the number of migrations.
+ """
+
+ self.fs.set_max_mds(3)
+ self.status = self.fs.wait_for_daemons()
+
+ self._setup_tree(random=1.0)
+ subtrees_old = self._wait_random_subtrees(100, status=self.status, rank="all")
+
+ self.fs.set_max_mds(2)
+ self.status = self.fs.wait_for_daemons()
+ time.sleep(30)
+
+ subtrees_new = self._wait_random_subtrees(100, status=self.status, rank="all")
+ count = 0
+ for old_subtree in subtrees_old:
+ for new_subtree in subtrees_new:
+ if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']):
+ count = count + 1
+ break
+
+ log.info("{0} migrations have occured due to the cluster resizing".format(count))
+ # rebalancing from 3 -> 2 may cause half of rank 0/1 to move and all of rank 2
+ self.assertLessEqual((count/len(subtrees_old)), (1.0/3.0/2.0 + 1.0/3.0/2.0 + 1.0/3.0)*1.25) # aka .66 with 25% overbudget
diff --git a/qa/tasks/cephfs/test_failover.py b/qa/tasks/cephfs/test_failover.py
new file mode 100644
index 000000000..ddcc58ccc
--- /dev/null
+++ b/qa/tasks/cephfs/test_failover.py
@@ -0,0 +1,819 @@
+import time
+import signal
+import logging
+import operator
+from random import randint, choice
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.fuse_mount import FuseMount
+
+log = logging.getLogger(__name__)
+
+class TestClusterAffinity(CephFSTestCase):
+ CLIENTS_REQUIRED = 0
+ MDSS_REQUIRED = 4
+
+ def _verify_join_fs(self, target, status=None, fs=None):
+ fs_select = fs
+ if fs_select is None:
+ fs_select = self.fs
+ if status is None:
+ status = fs_select.wait_for_daemons(timeout=30)
+ log.debug("%s", status)
+ target = sorted(target, key=operator.itemgetter('name'))
+ log.info("target = %s", target)
+ current = list(status.get_all())
+ current = sorted(current, key=operator.itemgetter('name'))
+ log.info("current = %s", current)
+ self.assertEqual(len(current), len(target))
+ for i in range(len(current)):
+ for attr in target[i]:
+ self.assertIn(attr, current[i])
+ self.assertEqual(target[i][attr], current[i][attr])
+
+ def _change_target_state(self, state, name, changes):
+ for entity in state:
+ if entity['name'] == name:
+ for k, v in changes.items():
+ entity[k] = v
+ return
+ self.fail("no entity")
+
+ def _verify_init(self, fs=None):
+ fs_select = fs
+ if fs_select is None:
+ fs_select = self.fs
+ status = fs_select.status()
+ log.info("status = {0}".format(status))
+ target = [{'join_fscid': -1, 'name': info['name']} for info in status.get_all()]
+ self._verify_join_fs(target, status=status, fs=fs_select)
+ return (status, target)
+
+ def _reach_target(self, target):
+ def takeover():
+ try:
+ self._verify_join_fs(target)
+ return True
+ except AssertionError as e:
+ log.debug("%s", e)
+ return False
+ self.wait_until_true(takeover, 30)
+
+ def test_join_fs_runtime(self):
+ """
+ That setting mds_join_fs at runtime affects the cluster layout.
+ """
+ status, target = self._verify_init()
+ standbys = list(status.get_standbys())
+ self.config_set('mds.'+standbys[0]['name'], 'mds_join_fs', 'cephfs')
+ self._change_target_state(target, standbys[0]['name'], {'join_fscid': self.fs.id, 'state': 'up:active'})
+ self._reach_target(target)
+
+ def test_join_fs_unset(self):
+ """
+ That unsetting mds_join_fs will cause failover if another high-affinity standby exists.
+ """
+ status, target = self._verify_init()
+ standbys = list(status.get_standbys())
+ names = (standbys[0]['name'], standbys[1]['name'])
+ self.config_set('mds.'+names[0], 'mds_join_fs', 'cephfs')
+ self.config_set('mds.'+names[1], 'mds_join_fs', 'cephfs')
+ self._change_target_state(target, names[0], {'join_fscid': self.fs.id})
+ self._change_target_state(target, names[1], {'join_fscid': self.fs.id})
+ self._reach_target(target)
+ time.sleep(5) # MDSMonitor tick
+ status = self.fs.wait_for_daemons()
+ active = self.fs.get_active_names(status=status)[0]
+ self.assertIn(active, names)
+ self.config_rm('mds.'+active, 'mds_join_fs')
+ self._change_target_state(target, active, {'join_fscid': -1})
+ new_active = (set(names) - set((active,))).pop()
+ self._change_target_state(target, new_active, {'state': 'up:active'})
+ self._reach_target(target)
+
+ def test_join_fs_drop(self):
+ """
+ That unsetting mds_join_fs will not cause failover if no high-affinity standby exists.
+ """
+ status, target = self._verify_init()
+ standbys = list(status.get_standbys())
+ active = standbys[0]['name']
+ self.config_set('mds.'+active, 'mds_join_fs', 'cephfs')
+ self._change_target_state(target, active, {'join_fscid': self.fs.id, 'state': 'up:active'})
+ self._reach_target(target)
+ self.config_rm('mds.'+active, 'mds_join_fs')
+ self._change_target_state(target, active, {'join_fscid': -1})
+ self._reach_target(target)
+
+ def test_join_fs_vanilla(self):
+ """
+ That a vanilla standby is preferred over others with mds_join_fs set to another fs.
+ """
+ fs2 = self.mds_cluster.newfs(name="cephfs2")
+ status, target = self._verify_init()
+ active = self.fs.get_active_names(status=status)[0]
+ status2, _ = self._verify_init(fs=fs2)
+ active2 = fs2.get_active_names(status=status2)[0]
+ standbys = [info['name'] for info in status.get_standbys()]
+ victim = standbys.pop()
+ # Set a bogus fs on the others
+ for mds in standbys:
+ self.config_set('mds.'+mds, 'mds_join_fs', 'cephfs2')
+ self._change_target_state(target, mds, {'join_fscid': fs2.id})
+ # The active MDS for cephfs2 will be replaced by the MDS for which
+ # file system affinity has been set. Also, set the affinity for
+ # the earlier active MDS so that it is not chosen by the monitors
+ # as an active MDS for the existing file system.
+ log.info(f'assigning affinity to cephfs2 for active mds (mds.{active2})')
+ self.config_set(f'mds.{active2}', 'mds_join_fs', 'cephfs2')
+ self._change_target_state(target, active2, {'join_fscid': fs2.id})
+ self.fs.rank_fail()
+ self._change_target_state(target, victim, {'state': 'up:active'})
+ self._reach_target(target)
+ status = self.fs.status()
+ active = self.fs.get_active_names(status=status)[0]
+ self.assertEqual(active, victim)
+
+ def test_join_fs_last_resort(self):
+ """
+ That a standby with mds_join_fs set to another fs is still used if necessary.
+ """
+ status, target = self._verify_init()
+ standbys = [info['name'] for info in status.get_standbys()]
+ for mds in standbys:
+ self.config_set('mds.'+mds, 'mds_join_fs', 'cephfs2')
+ fs2 = self.mds_cluster.newfs(name="cephfs2")
+ for mds in standbys:
+ self._change_target_state(target, mds, {'join_fscid': fs2.id})
+ self.fs.rank_fail()
+ status = self.fs.status()
+ ranks = list(self.fs.get_ranks(status=status))
+ self.assertEqual(len(ranks), 1)
+ self.assertIn(ranks[0]['name'], standbys)
+ # Note that we would expect the former active to reclaim its spot, but
+ # we're not testing that here.
+
+ def test_join_fs_steady(self):
+ """
+ That a sole MDS with mds_join_fs set will come back as active eventually even after failover.
+ """
+ status, target = self._verify_init()
+ active = self.fs.get_active_names(status=status)[0]
+ self.config_set('mds.'+active, 'mds_join_fs', 'cephfs')
+ self._change_target_state(target, active, {'join_fscid': self.fs.id})
+ self._reach_target(target)
+ self.fs.rank_fail()
+ self._reach_target(target)
+
+ def test_join_fs_standby_replay(self):
+ """
+ That a standby-replay daemon with weak affinity is replaced by a stronger one.
+ """
+ status, target = self._verify_init()
+ standbys = [info['name'] for info in status.get_standbys()]
+ self.config_set('mds.'+standbys[0], 'mds_join_fs', 'cephfs')
+ self._change_target_state(target, standbys[0], {'join_fscid': self.fs.id, 'state': 'up:active'})
+ self._reach_target(target)
+ self.fs.set_allow_standby_replay(True)
+ status = self.fs.status()
+ standbys = [info['name'] for info in status.get_standbys()]
+ self.config_set('mds.'+standbys[0], 'mds_join_fs', 'cephfs')
+ self._change_target_state(target, standbys[0], {'join_fscid': self.fs.id, 'state': 'up:standby-replay'})
+ self._reach_target(target)
+
+class TestClusterResize(CephFSTestCase):
+ CLIENTS_REQUIRED = 0
+ MDSS_REQUIRED = 3
+
+ def test_grow(self):
+ """
+ That the MDS cluster grows after increasing max_mds.
+ """
+
+ # Need all my standbys up as well as the active daemons
+ # self.wait_for_daemon_start() necessary?
+
+ self.fs.grow(2)
+ self.fs.grow(3)
+
+
+ def test_shrink(self):
+ """
+ That the MDS cluster shrinks automatically after decreasing max_mds.
+ """
+
+ self.fs.grow(3)
+ self.fs.shrink(1)
+
+ def test_up_less_than_max(self):
+ """
+ That a health warning is generated when max_mds is greater than active count.
+ """
+
+ status = self.fs.status()
+ mdss = [info['gid'] for info in status.get_all()]
+ self.fs.set_max_mds(len(mdss)+1)
+ self.wait_for_health("MDS_UP_LESS_THAN_MAX", 30)
+ self.fs.shrink(2)
+ self.wait_for_health_clear(30)
+
+ def test_down_health(self):
+ """
+ That marking a FS down does not generate a health warning
+ """
+
+ self.fs.set_down()
+ try:
+ self.wait_for_health("", 30)
+ raise RuntimeError("got health warning?")
+ except RuntimeError as e:
+ if "Timed out after" in str(e):
+ pass
+ else:
+ raise
+
+ def test_down_twice(self):
+ """
+ That marking a FS down twice does not wipe old_max_mds.
+ """
+
+ self.fs.grow(2)
+ self.fs.set_down()
+ self.fs.wait_for_daemons()
+ self.fs.set_down(False)
+ self.assertEqual(self.fs.get_var("max_mds"), 2)
+ self.fs.wait_for_daemons(timeout=60)
+
+ def test_down_grow(self):
+ """
+ That setting max_mds undoes down.
+ """
+
+ self.fs.set_down()
+ self.fs.wait_for_daemons()
+ self.fs.grow(2)
+ self.fs.wait_for_daemons()
+
+ def test_down(self):
+ """
+ That down setting toggles and sets max_mds appropriately.
+ """
+
+ self.fs.set_down()
+ self.fs.wait_for_daemons()
+ self.assertEqual(self.fs.get_var("max_mds"), 0)
+ self.fs.set_down(False)
+ self.assertEqual(self.fs.get_var("max_mds"), 1)
+ self.fs.wait_for_daemons()
+ self.assertEqual(self.fs.get_var("max_mds"), 1)
+
+ def test_hole(self):
+ """
+ Test that a hole cannot be created in the FS ranks.
+ """
+
+ fscid = self.fs.id
+
+ self.fs.grow(2)
+
+ # Now add a delay which should slow down how quickly rank 1 stops
+ self.config_set('mds', 'ms_inject_delay_max', '5.0')
+ self.config_set('mds', 'ms_inject_delay_probability', '1.0')
+ self.fs.set_max_mds(1)
+ log.info("status = {0}".format(self.fs.status()))
+
+ # Don't wait for rank 1 to stop
+ self.fs.set_max_mds(3)
+ log.info("status = {0}".format(self.fs.status()))
+
+ # Now check that the mons didn't try to promote a standby to rank 2
+ self.fs.set_max_mds(2)
+ status = self.fs.status()
+ try:
+ status = self.fs.wait_for_daemons(timeout=90)
+ ranks = set([info['rank'] for info in status.get_ranks(fscid)])
+ self.assertEqual(ranks, set([0, 1]))
+ finally:
+ log.info("status = {0}".format(status))
+
+ def test_thrash(self):
+ """
+ Test that thrashing max_mds does not fail.
+ """
+
+ max_mds = 2
+ for i in range(0, 100):
+ self.fs.set_max_mds(max_mds)
+ max_mds = (max_mds+1)%3+1
+
+ self.fs.wait_for_daemons(timeout=90)
+
+class TestFailover(CephFSTestCase):
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 2
+
+ def test_repeated_boot(self):
+ """
+ That multiple boot messages do not result in the MDS getting evicted.
+ """
+
+ interval = 10
+ self.config_set("mon", "paxos_propose_interval", interval)
+
+ mds = choice(list(self.fs.status().get_all()))
+
+ with self.assert_cluster_log(f"daemon mds.{mds['name']} restarted", present=False):
+ # Avoid a beacon to the monitors with down:dne by restarting:
+ self.fs.mds_fail(mds_id=mds['name'])
+ # `ceph mds fail` won't return until the FSMap is committed, double-check:
+ self.assertIsNone(self.fs.status().get_mds_gid(mds['gid']))
+ time.sleep(2) # for mds to restart and accept asok commands
+ status1 = self.fs.mds_asok(['status'], mds_id=mds['name'])
+ time.sleep(interval*1.5)
+ status2 = self.fs.mds_asok(['status'], mds_id=mds['name'])
+ self.assertEqual(status1['id'], status2['id'])
+
+ def test_simple(self):
+ """
+ That when the active MDS is killed, a standby MDS is promoted into
+ its rank after the grace period.
+
+ This is just a simple unit test, the harder cases are covered
+ in thrashing tests.
+ """
+
+ (original_active, ) = self.fs.get_active_names()
+ original_standbys = self.mds_cluster.get_standby_daemons()
+
+ # Kill the rank 0 daemon's physical process
+ self.fs.mds_stop(original_active)
+
+ # Wait until the monitor promotes his replacement
+ def promoted():
+ ranks = list(self.fs.get_ranks())
+ return len(ranks) > 0 and ranks[0]['name'] in original_standbys
+
+ log.info("Waiting for promotion of one of the original standbys {0}".format(
+ original_standbys))
+ self.wait_until_true(promoted, timeout=self.fs.beacon_timeout)
+
+ # Start the original rank 0 daemon up again, see that he becomes a standby
+ self.fs.mds_restart(original_active)
+ self.wait_until_true(
+ lambda: original_active in self.mds_cluster.get_standby_daemons(),
+ timeout=60 # Approximately long enough for MDS to start and mon to notice
+ )
+
+ def test_client_abort(self):
+ """
+ That a client will respect fuse_require_active_mds and error out
+ when the cluster appears to be unavailable.
+ """
+
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("Requires FUSE client to inject client metadata")
+
+ require_active = self.fs.get_config("fuse_require_active_mds", service_type="mon").lower() == "true"
+ if not require_active:
+ self.skipTest("fuse_require_active_mds is not set")
+
+ # Check it's not laggy to begin with
+ (original_active, ) = self.fs.get_active_names()
+ self.assertNotIn("laggy_since", self.fs.status().get_mds(original_active))
+
+ self.mounts[0].umount_wait()
+
+ # Control: that we can mount and unmount usually, while the cluster is healthy
+ self.mounts[0].mount_wait()
+ self.mounts[0].umount_wait()
+
+ # Stop the daemon processes
+ self.fs.mds_stop()
+
+ # Wait for everyone to go laggy
+ def laggy():
+ mdsmap = self.fs.get_mds_map()
+ for info in mdsmap['info'].values():
+ if "laggy_since" not in info:
+ return False
+
+ return True
+
+ self.wait_until_true(laggy, self.fs.beacon_timeout)
+ with self.assertRaises(CommandFailedError):
+ self.mounts[0].mount_wait()
+
+ def test_standby_count_wanted(self):
+ """
+ That cluster health warnings are generated by insufficient standbys available.
+ """
+
+ # Need all my standbys up as well as the active daemons
+ self.wait_for_daemon_start()
+
+ standbys = self.mds_cluster.get_standby_daemons()
+ self.assertGreaterEqual(len(standbys), 1)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)))
+
+ # Kill a standby and check for warning
+ victim = standbys.pop()
+ self.fs.mds_stop(victim)
+ self.wait_for_health("MDS_INSUFFICIENT_STANDBY", self.fs.beacon_timeout)
+
+ # restart the standby, see that he becomes a standby, check health clears
+ self.fs.mds_restart(victim)
+ self.wait_until_true(
+ lambda: victim in self.mds_cluster.get_standby_daemons(),
+ timeout=60 # Approximately long enough for MDS to start and mon to notice
+ )
+ self.wait_for_health_clear(timeout=30)
+
+ # Set it one greater than standbys ever seen
+ standbys = self.mds_cluster.get_standby_daemons()
+ self.assertGreaterEqual(len(standbys), 1)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)+1))
+ self.wait_for_health("MDS_INSUFFICIENT_STANDBY", self.fs.beacon_timeout)
+
+ # Set it to 0
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0')
+ self.wait_for_health_clear(timeout=30)
+
+ def test_discontinuous_mdsmap(self):
+ """
+ That discontinuous mdsmap does not affect failover.
+ See http://tracker.ceph.com/issues/24856.
+ """
+ self.fs.set_max_mds(2)
+ status = self.fs.wait_for_daemons()
+
+ self.mount_a.umount_wait()
+
+ monc_timeout = float(self.fs.get_config("mon_client_ping_timeout", service_type="mds"))
+
+ mds_0 = self.fs.get_rank(rank=0, status=status)
+ self.fs.rank_freeze(True, rank=0) # prevent failover
+ self.fs.rank_signal(signal.SIGSTOP, rank=0, status=status)
+ self.wait_until_true(
+ lambda: "laggy_since" in self.fs.get_rank(),
+ timeout=self.fs.beacon_timeout
+ )
+
+ self.fs.rank_fail(rank=1)
+ self.fs.wait_for_state('up:resolve', rank=1, timeout=30)
+
+ # Make sure of mds_0's monitor connection gets reset
+ time.sleep(monc_timeout * 2)
+
+ # Continue rank 0, it will get discontinuous mdsmap
+ self.fs.rank_signal(signal.SIGCONT, rank=0)
+ self.wait_until_true(
+ lambda: "laggy_since" not in self.fs.get_rank(rank=0),
+ timeout=self.fs.beacon_timeout
+ )
+
+ # mds.b will be stuck at 'reconnect' state if snapserver gets confused
+ # by discontinuous mdsmap
+ self.fs.wait_for_state('up:active', rank=1, timeout=30)
+ self.assertEqual(mds_0['gid'], self.fs.get_rank(rank=0)['gid'])
+ self.fs.rank_freeze(False, rank=0)
+
+ def test_connect_bootstrapping(self):
+ self.config_set("mds", "mds_sleep_rank_change", 10000000.0)
+ self.config_set("mds", "mds_connect_bootstrapping", True)
+ self.fs.set_max_mds(2)
+ self.fs.wait_for_daemons()
+ self.fs.rank_fail(rank=0)
+ # rank 0 will get stuck in up:resolve, see https://tracker.ceph.com/issues/53194
+ self.fs.wait_for_daemons()
+
+
+class TestStandbyReplay(CephFSTestCase):
+ CLIENTS_REQUIRED = 0
+ MDSS_REQUIRED = 4
+
+ def _confirm_no_replay(self):
+ status = self.fs.status()
+ _ = len(list(status.get_standbys()))
+ self.assertEqual(0, len(list(self.fs.get_replays(status=status))))
+ return status
+
+ def _confirm_single_replay(self, full=True, status=None, retries=3):
+ status = self.fs.wait_for_daemons(status=status)
+ ranks = sorted(self.fs.get_mds_map(status=status)['in'])
+ replays = list(self.fs.get_replays(status=status))
+ checked_replays = set()
+ for rank in ranks:
+ has_replay = False
+ for replay in replays:
+ if replay['rank'] == rank:
+ self.assertFalse(has_replay)
+ has_replay = True
+ checked_replays.add(replay['gid'])
+ if full and not has_replay:
+ if retries <= 0:
+ raise RuntimeError("rank "+str(rank)+" has no standby-replay follower")
+ else:
+ retries = retries-1
+ time.sleep(2)
+ self.assertEqual(checked_replays, set(info['gid'] for info in replays))
+ return status
+
+ def _check_replay_takeover(self, status, rank=0):
+ replay = self.fs.get_replay(rank=rank, status=status)
+ new_status = self.fs.wait_for_daemons()
+ new_active = self.fs.get_rank(rank=rank, status=new_status)
+ if replay:
+ self.assertEqual(replay['gid'], new_active['gid'])
+ else:
+ # double check takeover came from a standby (or some new daemon via restart)
+ found = False
+ for info in status.get_standbys():
+ if info['gid'] == new_active['gid']:
+ found = True
+ break
+ if not found:
+ for info in status.get_all():
+ self.assertNotEqual(info['gid'], new_active['gid'])
+ return new_status
+
+ def test_standby_replay_singleton(self):
+ """
+ That only one MDS becomes standby-replay.
+ """
+
+ self._confirm_no_replay()
+ self.fs.set_allow_standby_replay(True)
+ time.sleep(30)
+ self._confirm_single_replay()
+
+ def test_standby_replay_damaged(self):
+ """
+ That a standby-replay daemon can cause the rank to go damaged correctly.
+ """
+
+ self._confirm_no_replay()
+ self.config_set("mds", "mds_standby_replay_damaged", True)
+ self.fs.set_allow_standby_replay(True)
+ self.wait_until_true(
+ lambda: len(self.fs.get_damaged()) > 0,
+ timeout=30
+ )
+ status = self.fs.status()
+ self.assertListEqual([], list(self.fs.get_ranks(status=status)))
+ self.assertListEqual([0], self.fs.get_damaged(status=status))
+
+ def test_standby_replay_disable(self):
+ """
+ That turning off allow_standby_replay fails all standby-replay daemons.
+ """
+
+ self._confirm_no_replay()
+ self.fs.set_allow_standby_replay(True)
+ time.sleep(30)
+ self._confirm_single_replay()
+ self.fs.set_allow_standby_replay(False)
+ self._confirm_no_replay()
+
+ def test_standby_replay_singleton_fail(self):
+ """
+ That failures don't violate singleton constraint.
+ """
+
+ self._confirm_no_replay()
+ self.fs.set_allow_standby_replay(True)
+ status = self._confirm_single_replay()
+
+ for i in range(10):
+ time.sleep(randint(1, 5))
+ self.fs.rank_restart(status=status)
+ status = self._check_replay_takeover(status)
+ status = self._confirm_single_replay(status=status)
+
+ for i in range(10):
+ time.sleep(randint(1, 5))
+ self.fs.rank_fail()
+ status = self._check_replay_takeover(status)
+ status = self._confirm_single_replay(status=status)
+
+ def test_standby_replay_singleton_fail_multimds(self):
+ """
+ That failures don't violate singleton constraint with multiple actives.
+ """
+
+ status = self._confirm_no_replay()
+ new_max_mds = randint(2, len(list(status.get_standbys())))
+ self.fs.set_max_mds(new_max_mds)
+ self.fs.wait_for_daemons() # wait for actives to come online!
+ self.fs.set_allow_standby_replay(True)
+ status = self._confirm_single_replay(full=False)
+
+ for i in range(10):
+ time.sleep(randint(1, 5))
+ victim = randint(0, new_max_mds-1)
+ self.fs.rank_restart(rank=victim, status=status)
+ status = self._check_replay_takeover(status, rank=victim)
+ status = self._confirm_single_replay(status=status, full=False)
+
+ for i in range(10):
+ time.sleep(randint(1, 5))
+ victim = randint(0, new_max_mds-1)
+ self.fs.rank_fail(rank=victim)
+ status = self._check_replay_takeover(status, rank=victim)
+ status = self._confirm_single_replay(status=status, full=False)
+
+ def test_standby_replay_failure(self):
+ """
+ That the failure of a standby-replay daemon happens cleanly
+ and doesn't interrupt anything else.
+ """
+
+ status = self._confirm_no_replay()
+ self.fs.set_max_mds(1)
+ self.fs.set_allow_standby_replay(True)
+ status = self._confirm_single_replay()
+
+ for i in range(10):
+ time.sleep(randint(1, 5))
+ victim = self.fs.get_replay(status=status)
+ self.fs.mds_restart(mds_id=victim['name'])
+ status = self._confirm_single_replay(status=status)
+
+ def test_standby_replay_prepare_beacon(self):
+ """
+ That a MDSMonitor::prepare_beacon handles standby-replay daemons
+ correctly without removing the standby. (Note, usually a standby-replay
+ beacon will just be replied to by MDSMonitor::preprocess_beacon.)
+ """
+
+ status = self._confirm_no_replay()
+ self.fs.set_max_mds(1)
+ self.fs.set_allow_standby_replay(True)
+ status = self._confirm_single_replay()
+ replays = list(status.get_replays(self.fs.id))
+ self.assertEqual(len(replays), 1)
+ self.config_set('mds.'+replays[0]['name'], 'mds_inject_health_dummy', True)
+ time.sleep(10) # for something not to happen...
+ status = self._confirm_single_replay()
+ replays2 = list(status.get_replays(self.fs.id))
+ self.assertEqual(replays[0]['gid'], replays2[0]['gid'])
+
+ def test_rank_stopped(self):
+ """
+ That when a rank is STOPPED, standby replays for
+ that rank get torn down
+ """
+
+ status = self._confirm_no_replay()
+ standby_count = len(list(status.get_standbys()))
+ self.fs.set_max_mds(2)
+ self.fs.set_allow_standby_replay(True)
+ status = self._confirm_single_replay()
+
+ self.fs.set_max_mds(1) # stop rank 1
+
+ status = self._confirm_single_replay()
+ self.assertTrue(standby_count, len(list(status.get_standbys())))
+
+
+class TestMultiFilesystems(CephFSTestCase):
+ CLIENTS_REQUIRED = 2
+ MDSS_REQUIRED = 4
+
+ # We'll create our own filesystems and start our own daemons
+ REQUIRE_FILESYSTEM = False
+
+ def setUp(self):
+ super(TestMultiFilesystems, self).setUp()
+ self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set",
+ "enable_multiple", "true",
+ "--yes-i-really-mean-it")
+
+ def _setup_two(self):
+ fs_a = self.mds_cluster.newfs(name="alpha")
+ fs_b = self.mds_cluster.newfs(name="bravo")
+
+ self.mds_cluster.mds_restart()
+
+ # Wait for both filesystems to go healthy
+ fs_a.wait_for_daemons()
+ fs_b.wait_for_daemons()
+
+ # Reconfigure client auth caps
+ for mount in self.mounts:
+ self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', "client.{0}".format(mount.client_id),
+ 'mds', 'allow',
+ 'mon', 'allow r',
+ 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+ fs_a.get_data_pool_name(), fs_b.get_data_pool_name()))
+
+ return fs_a, fs_b
+
+ def test_clients(self):
+ fs_a, fs_b = self._setup_two()
+
+ # Mount a client on fs_a
+ self.mount_a.mount_wait(cephfs_name=fs_a.name)
+ self.mount_a.write_n_mb("pad.bin", 1)
+ self.mount_a.write_n_mb("test.bin", 2)
+ a_created_ino = self.mount_a.path_to_ino("test.bin")
+ self.mount_a.create_files()
+
+ # Mount a client on fs_b
+ self.mount_b.mount_wait(cephfs_name=fs_b.name)
+ self.mount_b.write_n_mb("test.bin", 1)
+ b_created_ino = self.mount_b.path_to_ino("test.bin")
+ self.mount_b.create_files()
+
+ # Check that a non-default filesystem mount survives an MDS
+ # failover (i.e. that map subscription is continuous, not
+ # just the first time), reproduces #16022
+ old_fs_b_mds = fs_b.get_active_names()[0]
+ self.mds_cluster.mds_stop(old_fs_b_mds)
+ self.mds_cluster.mds_fail(old_fs_b_mds)
+ fs_b.wait_for_daemons()
+ background = self.mount_b.write_background()
+ # Raise exception if the write doesn't finish (i.e. if client
+ # has not kept up with MDS failure)
+ try:
+ self.wait_until_true(lambda: background.finished, timeout=30)
+ except RuntimeError:
+ # The mount is stuck, we'll have to force it to fail cleanly
+ background.stdin.close()
+ self.mount_b.umount_wait(force=True)
+ raise
+
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ # See that the client's files went into the correct pool
+ self.assertTrue(fs_a.data_objects_present(a_created_ino, 1024 * 1024))
+ self.assertTrue(fs_b.data_objects_present(b_created_ino, 1024 * 1024))
+
+ def test_standby(self):
+ fs_a, fs_b = self._setup_two()
+
+ # Assert that the remaining two MDS daemons are now standbys
+ a_daemons = fs_a.get_active_names()
+ b_daemons = fs_b.get_active_names()
+ self.assertEqual(len(a_daemons), 1)
+ self.assertEqual(len(b_daemons), 1)
+ original_a = a_daemons[0]
+ original_b = b_daemons[0]
+ expect_standby_daemons = set(self.mds_cluster.mds_ids) - (set(a_daemons) | set(b_daemons))
+
+ # Need all my standbys up as well as the active daemons
+ self.wait_for_daemon_start()
+ self.assertEqual(expect_standby_daemons, self.mds_cluster.get_standby_daemons())
+
+ # Kill fs_a's active MDS, see a standby take over
+ self.mds_cluster.mds_stop(original_a)
+ self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_a)
+ self.wait_until_equal(lambda: len(fs_a.get_active_names()), 1, 30,
+ reject_fn=lambda v: v > 1)
+ # Assert that it's a *different* daemon that has now appeared in the map for fs_a
+ self.assertNotEqual(fs_a.get_active_names()[0], original_a)
+
+ # Kill fs_b's active MDS, see a standby take over
+ self.mds_cluster.mds_stop(original_b)
+ self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_b)
+ self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30,
+ reject_fn=lambda v: v > 1)
+ # Assert that it's a *different* daemon that has now appeared in the map for fs_a
+ self.assertNotEqual(fs_b.get_active_names()[0], original_b)
+
+ # Both of the original active daemons should be gone, and all standbys used up
+ self.assertEqual(self.mds_cluster.get_standby_daemons(), set())
+
+ # Restart the ones I killed, see them reappear as standbys
+ self.mds_cluster.mds_restart(original_a)
+ self.mds_cluster.mds_restart(original_b)
+ self.wait_until_true(
+ lambda: {original_a, original_b} == self.mds_cluster.get_standby_daemons(),
+ timeout=30
+ )
+
+ def test_grow_shrink(self):
+ # Usual setup...
+ fs_a, fs_b = self._setup_two()
+
+ # Increase max_mds on fs_b, see a standby take up the role
+ fs_b.set_max_mds(2)
+ self.wait_until_equal(lambda: len(fs_b.get_active_names()), 2, 30,
+ reject_fn=lambda v: v > 2 or v < 1)
+
+ # Increase max_mds on fs_a, see a standby take up the role
+ fs_a.set_max_mds(2)
+ self.wait_until_equal(lambda: len(fs_a.get_active_names()), 2, 30,
+ reject_fn=lambda v: v > 2 or v < 1)
+
+ # Shrink fs_b back to 1, see a daemon go back to standby
+ fs_b.set_max_mds(1)
+ self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30,
+ reject_fn=lambda v: v > 2 or v < 1)
+
+ # Grow fs_a up to 3, see the former fs_b daemon join it.
+ fs_a.set_max_mds(3)
+ self.wait_until_equal(lambda: len(fs_a.get_active_names()), 3, 60,
+ reject_fn=lambda v: v > 3 or v < 2)
diff --git a/qa/tasks/cephfs/test_flush.py b/qa/tasks/cephfs/test_flush.py
new file mode 100644
index 000000000..17cb84970
--- /dev/null
+++ b/qa/tasks/cephfs/test_flush.py
@@ -0,0 +1,112 @@
+
+from textwrap import dedent
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO
+
+
+class TestFlush(CephFSTestCase):
+ def test_flush(self):
+ self.mount_a.run_shell(["mkdir", "mydir"])
+ self.mount_a.run_shell(["touch", "mydir/alpha"])
+ dir_ino = self.mount_a.path_to_ino("mydir")
+ file_ino = self.mount_a.path_to_ino("mydir/alpha")
+
+ # Unmount the client so that it isn't still holding caps
+ self.mount_a.umount_wait()
+
+ # Before flush, the dirfrag object does not exist
+ with self.assertRaises(ObjectNotFound):
+ self.fs.list_dirfrag(dir_ino)
+
+ # Before flush, the file's backtrace has not been written
+ with self.assertRaises(ObjectNotFound):
+ self.fs.read_backtrace(file_ino)
+
+ # Before flush, there are no dentries in the root
+ self.assertEqual(self.fs.list_dirfrag(ROOT_INO), [])
+
+ # Execute flush
+ flush_data = self.fs.mds_asok(["flush", "journal"])
+ self.assertEqual(flush_data['return_code'], 0)
+
+ # After flush, the dirfrag object has been created
+ dir_list = self.fs.list_dirfrag(dir_ino)
+ self.assertEqual(dir_list, ["alpha_head"])
+
+ # And the 'mydir' dentry is in the root
+ self.assertEqual(self.fs.list_dirfrag(ROOT_INO), ['mydir_head'])
+
+ # ...and the data object has its backtrace
+ backtrace = self.fs.read_backtrace(file_ino)
+ self.assertEqual(['alpha', 'mydir'], [a['dname'] for a in backtrace['ancestors']])
+ self.assertEqual([dir_ino, 1], [a['dirino'] for a in backtrace['ancestors']])
+ self.assertEqual(file_ino, backtrace['ino'])
+
+ # ...and the journal is truncated to just a single subtreemap from the
+ # newly created segment
+ summary_output = self.fs.journal_tool(["event", "get", "summary"], 0)
+ try:
+ self.assertEqual(summary_output,
+ dedent(
+ """
+ Events by type:
+ SUBTREEMAP: 1
+ Errors: 0
+ """
+ ).strip())
+ except AssertionError:
+ # In some states, flushing the journal will leave you
+ # an extra event from locks a client held. This is
+ # correct behaviour: the MDS is flushing the journal,
+ # it's just that new events are getting added too.
+ # In this case, we should nevertheless see a fully
+ # empty journal after a second flush.
+ self.assertEqual(summary_output,
+ dedent(
+ """
+ Events by type:
+ SUBTREEMAP: 1
+ UPDATE: 1
+ Errors: 0
+ """
+ ).strip())
+ flush_data = self.fs.mds_asok(["flush", "journal"])
+ self.assertEqual(flush_data['return_code'], 0)
+ self.assertEqual(self.fs.journal_tool(["event", "get", "summary"], 0),
+ dedent(
+ """
+ Events by type:
+ SUBTREEMAP: 1
+ Errors: 0
+ """
+ ).strip())
+
+ # Now for deletion!
+ # We will count the RADOS deletions and MDS file purges, to verify that
+ # the expected behaviour is happening as a result of the purge
+ initial_dels = self.fs.mds_asok(['perf', 'dump', 'objecter'])['objecter']['osdop_delete']
+ initial_purges = self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['strays_enqueued']
+
+ # Use a client to delete a file
+ self.mount_a.mount_wait()
+ self.mount_a.run_shell(["rm", "-rf", "mydir"])
+
+ # Flush the journal so that the directory inode can be purged
+ flush_data = self.fs.mds_asok(["flush", "journal"])
+ self.assertEqual(flush_data['return_code'], 0)
+
+ # We expect to see a single file purge
+ self.wait_until_true(
+ lambda: self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['strays_enqueued'] - initial_purges >= 2,
+ 60)
+
+ # We expect two deletions, one of the dirfrag and one of the backtrace
+ self.wait_until_true(
+ lambda: self.fs.mds_asok(['perf', 'dump', 'objecter'])['objecter']['osdop_delete'] - initial_dels >= 2,
+ 60) # timeout is fairly long to allow for tick+rados latencies
+
+ with self.assertRaises(ObjectNotFound):
+ self.fs.list_dirfrag(dir_ino)
+ with self.assertRaises(ObjectNotFound):
+ self.fs.read_backtrace(file_ino)
+ self.assertEqual(self.fs.list_dirfrag(ROOT_INO), [])
diff --git a/qa/tasks/cephfs/test_forward_scrub.py b/qa/tasks/cephfs/test_forward_scrub.py
new file mode 100644
index 000000000..f3cec881b
--- /dev/null
+++ b/qa/tasks/cephfs/test_forward_scrub.py
@@ -0,0 +1,307 @@
+
+"""
+Test that the forward scrub functionality can traverse metadata and apply
+requested tags, on well formed metadata.
+
+This is *not* the real testing for forward scrub, which will need to test
+how the functionality responds to damaged metadata.
+
+"""
+import logging
+import json
+
+from collections import namedtuple
+from io import BytesIO
+from textwrap import dedent
+
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+import struct
+
+log = logging.getLogger(__name__)
+
+
+ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
+
+
+class TestForwardScrub(CephFSTestCase):
+ MDSS_REQUIRED = 1
+
+ def _read_str_xattr(self, pool, obj, attr):
+ """
+ Read a ceph-encoded string from a rados xattr
+ """
+ output = self.fs.mon_manager.do_rados(["getxattr", obj, attr], pool=pool,
+ stdout=BytesIO()).stdout.getvalue()
+ strlen = struct.unpack('i', output[0:4])[0]
+ return output[4:(4 + strlen)].decode(encoding='ascii')
+
+ def _get_paths_to_ino(self):
+ inos = {}
+ p = self.mount_a.run_shell(["find", "./"])
+ paths = p.stdout.getvalue().strip().split()
+ for path in paths:
+ inos[path] = self.mount_a.path_to_ino(path)
+
+ return inos
+
+ def test_apply_tag(self):
+ self.mount_a.run_shell(["mkdir", "parentdir"])
+ self.mount_a.run_shell(["mkdir", "parentdir/childdir"])
+ self.mount_a.run_shell(["touch", "rfile"])
+ self.mount_a.run_shell(["touch", "parentdir/pfile"])
+ self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"])
+
+ # Build a structure mapping path to inode, as we will later want
+ # to check object by object and objects are named after ino number
+ inos = self._get_paths_to_ino()
+
+ # Flush metadata: this is a friendly test of forward scrub so we're skipping
+ # the part where it's meant to cope with dirty metadata
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(["flush", "journal"])
+
+ tag = "mytag"
+
+ # Execute tagging forward scrub
+ self.fs.mds_asok(["tag", "path", "/parentdir", tag])
+ # Wait for completion
+ import time
+ time.sleep(10)
+ # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll
+ # watch that instead
+
+ # Check that dirs were tagged
+ for dirpath in ["./parentdir", "./parentdir/childdir"]:
+ self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name())
+
+ # Check that files were tagged
+ for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]:
+ self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name())
+
+ # This guy wasn't in the tag path, shouldn't have been tagged
+ self.assertUntagged(inos["./rfile"])
+
+ def assertUntagged(self, ino):
+ file_obj_name = "{0:x}.00000000".format(ino)
+ with self.assertRaises(CommandFailedError):
+ self._read_str_xattr(
+ self.fs.get_data_pool_name(),
+ file_obj_name,
+ "scrub_tag"
+ )
+
+ def assertTagged(self, ino, tag, pool):
+ file_obj_name = "{0:x}.00000000".format(ino)
+ wrote = self._read_str_xattr(
+ pool,
+ file_obj_name,
+ "scrub_tag"
+ )
+ self.assertEqual(wrote, tag)
+
+ def _validate_linkage(self, expected):
+ inos = self._get_paths_to_ino()
+ try:
+ self.assertDictEqual(inos, expected)
+ except AssertionError:
+ log.error("Expected: {0}".format(json.dumps(expected, indent=2)))
+ log.error("Actual: {0}".format(json.dumps(inos, indent=2)))
+ raise
+
+ def test_orphan_scan(self):
+ # Create some files whose metadata we will flush
+ self.mount_a.run_python(dedent("""
+ import os
+ mount_point = "{mount_point}"
+ parent = os.path.join(mount_point, "parent")
+ os.mkdir(parent)
+ flushed = os.path.join(parent, "flushed")
+ os.mkdir(flushed)
+ for f in ["alpha", "bravo", "charlie"]:
+ open(os.path.join(flushed, f), 'w').write(f)
+ """.format(mount_point=self.mount_a.mountpoint)))
+
+ inos = self._get_paths_to_ino()
+
+ # Flush journal
+ # Umount before flush to avoid cap releases putting
+ # things we don't want in the journal later.
+ self.mount_a.umount_wait()
+ self.fs.flush()
+
+ # Create a new inode that's just in the log, i.e. would
+ # look orphaned to backward scan if backward scan wisnae
+ # respectin' tha scrub_tag xattr.
+ self.mount_a.mount_wait()
+ self.mount_a.run_shell(["mkdir", "parent/unflushed"])
+ self.mount_a.run_shell(["dd", "if=/dev/urandom",
+ "of=./parent/unflushed/jfile",
+ "bs=1M", "count=8"])
+ inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed")
+ inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile")
+ self.mount_a.umount_wait()
+
+ # Orphan an inode by deleting its dentry
+ # Our victim will be.... bravo.
+ self.mount_a.umount_wait()
+ self.fs.fail()
+ self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
+ self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
+ frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"])
+ self.fs.radosm(["rmomapkey", frag_obj_id, "bravo_head"])
+
+ self.fs.set_joinable()
+ self.fs.wait_for_daemons()
+
+ # See that the orphaned file is indeed missing from a client's POV
+ self.mount_a.mount_wait()
+ damaged_state = self._get_paths_to_ino()
+ self.assertNotIn("./parent/flushed/bravo", damaged_state)
+ self.mount_a.umount_wait()
+
+ # Run a tagging forward scrub
+ tag = "mytag123"
+ self.fs.rank_asok(["tag", "path", "/parent", tag])
+
+ # See that the orphan wisnae tagged
+ self.assertUntagged(inos['./parent/flushed/bravo'])
+
+ # See that the flushed-metadata-and-still-present files are tagged
+ self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name())
+ self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name())
+
+ # See that journalled-but-not-flushed file *was* tagged
+ self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name())
+
+ # okay, now we are going to run cephfs-data-scan. It's necessary to
+ # have a clean journal otherwise replay will blowup on mismatched
+ # inotable versions (due to scan_links)
+ self.fs.flush()
+ self.fs.fail()
+ self.fs.journal_tool(["journal", "reset", "--force"], 0)
+
+ # Run cephfs-data-scan targeting only orphans
+ self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
+ self.fs.data_scan([
+ "scan_inodes",
+ "--filter-tag", tag,
+ self.fs.get_data_pool_name()
+ ])
+ self.fs.data_scan(["scan_links"])
+
+ # After in-place injection stats should be kosher again
+ self.fs.set_ceph_conf('mds', 'mds verify scatter', True)
+ self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True)
+
+ # And we should have all the same linkage we started with,
+ # and no lost+found, and no extra inodes!
+ self.fs.set_joinable()
+ self.fs.wait_for_daemons()
+ self.mount_a.mount_wait()
+ self._validate_linkage(inos)
+
+ def _stash_inotable(self):
+ # Get all active ranks
+ ranks = self.fs.get_all_mds_rank()
+
+ inotable_dict = {}
+ for rank in ranks:
+ inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable"
+ print("Trying to fetch inotable object: " + inotable_oid)
+
+ #self.fs.get_metadata_object("InoTable", "mds0_inotable")
+ inotable_raw = self.fs.radosmo(['get', inotable_oid, '-'])
+ inotable_dict[inotable_oid] = inotable_raw
+ return inotable_dict
+
+ def test_inotable_sync(self):
+ self.mount_a.write_n_mb("file1_sixmegs", 6)
+
+ # Flush journal
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(["flush", "journal"])
+
+ inotable_copy = self._stash_inotable()
+
+ self.mount_a.mount_wait()
+
+ self.mount_a.write_n_mb("file2_sixmegs", 6)
+ self.mount_a.write_n_mb("file3_sixmegs", 6)
+
+ inos = self._get_paths_to_ino()
+
+ # Flush journal
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(["flush", "journal"])
+
+ self.mount_a.umount_wait()
+
+ with self.assert_cluster_log("inode table repaired", invert_match=True):
+ out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
+ self.assertNotEqual(out_json, None)
+ self.assertEqual(out_json["return_code"], 0)
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+ self.fs.fail()
+
+ # Truncate the journal (to ensure the inotable on disk
+ # is all that will be in the InoTable in memory)
+
+ self.fs.journal_tool(["event", "splice",
+ "--inode={0}".format(inos["./file2_sixmegs"]), "summary"], 0)
+
+ self.fs.journal_tool(["event", "splice",
+ "--inode={0}".format(inos["./file3_sixmegs"]), "summary"], 0)
+
+ # Revert to old inotable.
+ for key, value in inotable_copy.items():
+ self.fs.radosm(["put", key, "-"], stdin=BytesIO(value))
+
+ self.fs.set_joinable()
+ self.fs.wait_for_daemons()
+
+ with self.assert_cluster_log("inode table repaired"):
+ out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
+ self.assertNotEqual(out_json, None)
+ self.assertEqual(out_json["return_code"], 0)
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+ self.fs.fail()
+ table_text = self.fs.table_tool(["0", "show", "inode"])
+ table = json.loads(table_text)
+ self.assertGreater(
+ table['0']['data']['inotable']['free'][0]['start'],
+ inos['./file3_sixmegs'])
+
+ def test_backtrace_repair(self):
+ """
+ That the MDS can repair an inodes backtrace in the data pool
+ if it is found to be damaged.
+ """
+ # Create a file for subsequent checks
+ self.mount_a.run_shell(["mkdir", "parent_a"])
+ self.mount_a.run_shell(["touch", "parent_a/alpha"])
+ file_ino = self.mount_a.path_to_ino("parent_a/alpha")
+
+ # That backtrace and layout are written after initial flush
+ self.fs.mds_asok(["flush", "journal"])
+ backtrace = self.fs.read_backtrace(file_ino)
+ self.assertEqual(['alpha', 'parent_a'],
+ [a['dname'] for a in backtrace['ancestors']])
+
+ # Go corrupt the backtrace
+ self.fs._write_data_xattr(file_ino, "parent",
+ "oh i'm sorry did i overwrite your xattr?")
+
+ with self.assert_cluster_log("bad backtrace on inode"):
+ out_json = self.fs.run_scrub(["start", "/", "repair,recursive"])
+ self.assertNotEqual(out_json, None)
+ self.assertEqual(out_json["return_code"], 0)
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+ self.fs.mds_asok(["flush", "journal"])
+ backtrace = self.fs.read_backtrace(file_ino)
+ self.assertEqual(['alpha', 'parent_a'],
+ [a['dname'] for a in backtrace['ancestors']])
diff --git a/qa/tasks/cephfs/test_fragment.py b/qa/tasks/cephfs/test_fragment.py
new file mode 100644
index 000000000..7d35ec0df
--- /dev/null
+++ b/qa/tasks/cephfs/test_fragment.py
@@ -0,0 +1,359 @@
+from io import StringIO
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.orchestra import run
+
+import os
+import time
+import logging
+log = logging.getLogger(__name__)
+
+
+class TestFragmentation(CephFSTestCase):
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 1
+
+ def get_splits(self):
+ return self.fs.mds_asok(['perf', 'dump', 'mds'])['mds']['dir_split']
+
+ def get_merges(self):
+ return self.fs.mds_asok(['perf', 'dump', 'mds'])['mds']['dir_merge']
+
+ def get_dir_ino(self, path):
+ dir_cache = self.fs.read_cache(path, 0)
+ dir_ino = None
+ dir_inono = self.mount_a.path_to_ino(path.strip("/"))
+ for ino in dir_cache:
+ if ino['ino'] == dir_inono:
+ dir_ino = ino
+ break
+ self.assertIsNotNone(dir_ino)
+ return dir_ino
+
+ def _configure(self, **kwargs):
+ """
+ Apply kwargs as MDS configuration settings, enable dirfrags
+ and restart the MDSs.
+ """
+
+ for k, v in kwargs.items():
+ self.ceph_cluster.set_ceph_conf("mds", k, v.__str__())
+
+ self.mds_cluster.mds_fail_restart()
+ self.fs.wait_for_daemons()
+
+ def test_oversize(self):
+ """
+ That a directory is split when it becomes too large.
+ """
+
+ split_size = 20
+ merge_size = 5
+
+ self._configure(
+ mds_bal_split_size=split_size,
+ mds_bal_merge_size=merge_size,
+ mds_bal_split_bits=1
+ )
+
+ self.assertEqual(self.get_splits(), 0)
+
+ self.mount_a.create_n_files("splitdir/file", split_size + 1)
+
+ self.wait_until_true(
+ lambda: self.get_splits() == 1,
+ timeout=30
+ )
+
+ frags = self.get_dir_ino("/splitdir")['dirfrags']
+ self.assertEqual(len(frags), 2)
+ self.assertEqual(frags[0]['dirfrag'], "0x10000000000.0*")
+ self.assertEqual(frags[1]['dirfrag'], "0x10000000000.1*")
+ self.assertEqual(
+ sum([len(f['dentries']) for f in frags]),
+ split_size + 1
+ )
+
+ self.assertEqual(self.get_merges(), 0)
+
+ self.mount_a.run_shell(["rm", "-f", run.Raw("splitdir/file*")])
+
+ self.wait_until_true(
+ lambda: self.get_merges() == 1,
+ timeout=30
+ )
+
+ self.assertEqual(len(self.get_dir_ino("/splitdir")["dirfrags"]), 1)
+
+ def test_rapid_creation(self):
+ """
+ That the fast-splitting limit of 1.5x normal limit is
+ applied when creating dentries quickly.
+ """
+
+ split_size = 100
+ merge_size = 1
+
+ self._configure(
+ mds_bal_split_size=split_size,
+ mds_bal_merge_size=merge_size,
+ mds_bal_split_bits=3,
+ mds_bal_fragment_size_max=int(split_size * 1.5 + 2)
+ )
+
+ # We test this only at a single split level. If a client was sending
+ # IO so fast that it hit a second split before the first split
+ # was complete, it could violate mds_bal_fragment_size_max -- there
+ # is a window where the child dirfrags of a split are unfrozen
+ # (so they can grow), but still have STATE_FRAGMENTING (so they
+ # can't be split).
+
+ # By writing 4x the split size when the split bits are set
+ # to 3 (i.e. 4-ways), I am reasonably sure to see precisely
+ # one split. The test is to check whether that split
+ # happens soon enough that the client doesn't exceed
+ # 2x the split_size (the "immediate" split mode should
+ # kick in at 1.5x the split size).
+
+ self.assertEqual(self.get_splits(), 0)
+ self.mount_a.create_n_files("splitdir/file", split_size * 4)
+ self.wait_until_equal(
+ self.get_splits,
+ 1,
+ reject_fn=lambda s: s > 1,
+ timeout=30
+ )
+
+ def test_deep_split(self):
+ """
+ That when the directory grows many times larger than split size,
+ the fragments get split again.
+ """
+
+ split_size = 100
+ merge_size = 1 # i.e. don't merge frag unless its empty
+ split_bits = 1
+
+ branch_factor = 2**split_bits
+
+ # Arbitrary: how many levels shall we try fragmenting before
+ # ending the test?
+ max_depth = 5
+
+ self._configure(
+ mds_bal_split_size=split_size,
+ mds_bal_merge_size=merge_size,
+ mds_bal_split_bits=split_bits
+ )
+
+ # Each iteration we will create another level of fragments. The
+ # placement of dentries into fragments is by hashes (i.e. pseudo
+ # random), so we rely on statistics to get the behaviour that
+ # by writing about 1.5x as many dentries as the split_size times
+ # the number of frags, we will get them all to exceed their
+ # split size and trigger a split.
+ depth = 0
+ files_written = 0
+ splits_expected = 0
+ while depth < max_depth:
+ log.info("Writing files for depth {0}".format(depth))
+ target_files = branch_factor**depth * int(split_size * 1.5)
+ create_files = target_files - files_written
+
+ self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
+ "{0} Writing {1} files (depth={2})".format(
+ self.__class__.__name__, create_files, depth
+ ))
+ self.mount_a.create_n_files("splitdir/file_{0}".format(depth),
+ create_files)
+ self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
+ "{0} Done".format(self.__class__.__name__))
+
+ files_written += create_files
+ log.info("Now have {0} files".format(files_written))
+
+ splits_expected += branch_factor**depth
+ log.info("Waiting to see {0} splits".format(splits_expected))
+ try:
+ self.wait_until_equal(
+ self.get_splits,
+ splits_expected,
+ timeout=30,
+ reject_fn=lambda x: x > splits_expected
+ )
+
+ frags = self.get_dir_ino("/splitdir")['dirfrags']
+ self.assertEqual(len(frags), branch_factor**(depth+1))
+ self.assertEqual(
+ sum([len(f['dentries']) for f in frags]),
+ target_files
+ )
+ except:
+ # On failures, log what fragmentation we actually ended
+ # up with. This block is just for logging, at the end
+ # we raise the exception again.
+ frags = self.get_dir_ino("/splitdir")['dirfrags']
+ log.info("depth={0} splits_expected={1} files_written={2}".format(
+ depth, splits_expected, files_written
+ ))
+ log.info("Dirfrags:")
+ for f in frags:
+ log.info("{0}: {1}".format(
+ f['dirfrag'], len(f['dentries'])
+ ))
+ raise
+
+ depth += 1
+
+ # Remember the inode number because we will be checking for
+ # objects later.
+ dir_inode_no = self.mount_a.path_to_ino("splitdir")
+
+ self.mount_a.run_shell(["rm", "-rf", "splitdir/"])
+ self.mount_a.umount_wait()
+
+ self.fs.mds_asok(['flush', 'journal'])
+
+ def _check_pq_finished():
+ num_strays = self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['num_strays']
+ pq_ops = self.fs.mds_asok(['perf', 'dump', 'purge_queue'])['purge_queue']['pq_executing']
+ return num_strays == 0 and pq_ops == 0
+
+ # Wait for all strays to purge
+ self.wait_until_true(
+ lambda: _check_pq_finished(),
+ timeout=1200
+ )
+ # Check that the metadata pool objects for all the myriad
+ # child fragments are gone
+ metadata_objs = self.fs.radosmo(["ls"], stdout=StringIO()).strip()
+ frag_objs = []
+ for o in metadata_objs.split("\n"):
+ if o.startswith("{0:x}.".format(dir_inode_no)):
+ frag_objs.append(o)
+ self.assertListEqual(frag_objs, [])
+
+ def test_split_straydir(self):
+ """
+ That stray dir is split when it becomes too large.
+ """
+ def _count_fragmented():
+ mdsdir_cache = self.fs.read_cache("~mdsdir", 1)
+ num = 0
+ for ino in mdsdir_cache:
+ if ino["ino"] == 0x100:
+ continue
+ if len(ino["dirfrags"]) > 1:
+ log.info("straydir 0x{:X} is fragmented".format(ino["ino"]))
+ num += 1;
+ return num
+
+ split_size = 50
+ merge_size = 5
+ split_bits = 1
+
+ self._configure(
+ mds_bal_split_size=split_size,
+ mds_bal_merge_size=merge_size,
+ mds_bal_split_bits=split_bits,
+ mds_bal_fragment_size_max=(split_size * 100)
+ )
+
+ # manually split/merge
+ self.assertEqual(_count_fragmented(), 0)
+ self.fs.mds_asok(["dirfrag", "split", "~mdsdir/stray8", "0/0", "1"])
+ self.fs.mds_asok(["dirfrag", "split", "~mdsdir/stray9", "0/0", "1"])
+ self.wait_until_true(
+ lambda: _count_fragmented() == 2,
+ timeout=30
+ )
+
+ time.sleep(30)
+
+ self.fs.mds_asok(["dirfrag", "merge", "~mdsdir/stray8", "0/0"])
+ self.wait_until_true(
+ lambda: _count_fragmented() == 1,
+ timeout=30
+ )
+
+ time.sleep(30)
+
+ # auto merge
+
+ # merging stray dirs is driven by MDCache::advance_stray()
+ # advance stray dir 10 times
+ for _ in range(10):
+ self.fs.mds_asok(['flush', 'journal'])
+
+ self.wait_until_true(
+ lambda: _count_fragmented() == 0,
+ timeout=30
+ )
+
+ # auto split
+
+ # there are 10 stray dirs. advance stray dir 20 times
+ self.mount_a.create_n_files("testdir1/file", split_size * 20)
+ self.mount_a.run_shell(["mkdir", "testdir2"])
+ testdir1_path = os.path.join(self.mount_a.mountpoint, "testdir1")
+ for i in self.mount_a.ls(testdir1_path):
+ self.mount_a.run_shell(["ln", "testdir1/{0}".format(i), "testdir2/"])
+
+ self.mount_a.umount_wait()
+ self.mount_a.mount_wait()
+ self.mount_a.wait_until_mounted()
+
+ # flush journal and restart mds. after restart, testdir2 is not in mds' cache
+ self.fs.mds_asok(['flush', 'journal'])
+ self.mds_cluster.mds_fail_restart()
+ self.fs.wait_for_daemons()
+ # splitting stray dirs is driven by MDCache::advance_stray()
+ # advance stray dir after unlink 'split_size' files.
+ self.fs.mds_asok(['config', 'set', 'mds_log_events_per_segment', str(split_size)])
+
+ self.assertEqual(_count_fragmented(), 0)
+ self.mount_a.run_shell(["rm", "-rf", "testdir1"])
+ self.wait_until_true(
+ lambda: _count_fragmented() > 0,
+ timeout=30
+ )
+
+ def test_dir_merge_with_snap_items(self):
+ """
+ That directory remain fragmented when snapshot items are taken into account.
+ """
+ split_size = 1000
+ merge_size = 100
+ self._configure(
+ mds_bal_split_size=split_size,
+ mds_bal_merge_size=merge_size,
+ mds_bal_split_bits=1
+ )
+
+ # split the dir
+ create_files = split_size + 50
+ self.mount_a.create_n_files("splitdir/file_", create_files)
+
+ self.wait_until_true(
+ lambda: self.get_splits() == 1,
+ timeout=30
+ )
+
+ frags = self.get_dir_ino("/splitdir")['dirfrags']
+ self.assertEqual(len(frags), 2)
+ self.assertEqual(frags[0]['dirfrag'], "0x10000000000.0*")
+ self.assertEqual(frags[1]['dirfrag'], "0x10000000000.1*")
+ self.assertEqual(
+ sum([len(f['dentries']) for f in frags]), create_files
+ )
+
+ self.assertEqual(self.get_merges(), 0)
+
+ self.mount_a.run_shell(["mkdir", "splitdir/.snap/snap_a"])
+ self.mount_a.run_shell(["mkdir", "splitdir/.snap/snap_b"])
+ self.mount_a.run_shell(["rm", "-f", run.Raw("splitdir/file*")])
+
+ time.sleep(30)
+
+ self.assertEqual(self.get_merges(), 0)
+ self.assertEqual(len(self.get_dir_ino("/splitdir")["dirfrags"]), 2)
diff --git a/qa/tasks/cephfs/test_fscrypt.py b/qa/tasks/cephfs/test_fscrypt.py
new file mode 100644
index 000000000..11dd2038f
--- /dev/null
+++ b/qa/tasks/cephfs/test_fscrypt.py
@@ -0,0 +1,77 @@
+from logging import getLogger
+
+from io import StringIO
+from tasks.cephfs.xfstests_dev import XFSTestsDev
+
+
+log = getLogger(__name__)
+
+
+class TestFscrypt(XFSTestsDev):
+
+ def setup_xfsprogs_devs(self):
+ self.install_xfsprogs = True
+
+ def require_kernel_mount(self):
+ from tasks.cephfs.fuse_mount import FuseMount
+ from tasks.cephfs.kernel_mount import KernelMount
+
+ # TODO: make xfstests-dev compatible with ceph-fuse. xfstests-dev
+ # remounts CephFS before running tests using kernel, so ceph-fuse
+ # mounts are never actually tested.
+ if isinstance(self.mount_a, FuseMount):
+ self.skipTest('Requires kernel client; xfstests-dev not '\
+ 'compatible with ceph-fuse ATM.')
+ elif isinstance(self.mount_a, KernelMount):
+ log.info('client is kernel mounted')
+
+ def test_fscrypt_encrypt(self):
+ self.require_kernel_mount()
+
+ # XXX: check_status is set to False so that we can check for command's
+ # failure on our own (since this command doesn't set right error code
+ # and error message in some cases) and print custom log messages
+ # accordingly.
+ proc = self.mount_a.client_remote.run(args=['sudo', 'env', 'DIFF_LENGTH=0',
+ './check', '-g', 'encrypt'], cwd=self.xfstests_repo_path, stdout=StringIO(),
+ stderr=StringIO(), timeout=900, check_status=False, omit_sudo=False,
+ label='running tests for encrypt from xfstests-dev')
+
+ if proc.returncode != 0:
+ log.info('Command failed.')
+ log.info(f'Command return value: {proc.returncode}')
+ stdout, stderr = proc.stdout.getvalue(), proc.stderr.getvalue()
+ log.info(f'Command stdout -\n{stdout}')
+ log.info(f'Command stderr -\n{stderr}')
+
+ # Currently only the 395,396,397,421,429,435,440,580,593,595 and 598
+ # of the 26 test cases will be actually ran, all the others will be
+ # skipped for now because of not supporting features in kernel or kceph.
+ self.assertEqual(proc.returncode, 0)
+ self.assertIn('Passed all 26 tests', stdout)
+
+ def test_fscrypt_dummy_encryption_with_quick_group(self):
+ self.require_kernel_mount()
+
+ self.write_local_config('test_dummy_encryption')
+
+ # XXX: check_status is set to False so that we can check for command's
+ # failure on our own (since this command doesn't set right error code
+ # and error message in some cases) and print custom log messages
+ # accordingly. This will take a long time and set the timeout to 3 hours.
+ proc = self.mount_a.client_remote.run(args=['sudo', 'env', 'DIFF_LENGTH=0',
+ './check', '-g', 'quick', '-E', './ceph.exclude'], cwd=self.xfstests_repo_path,
+ stdout=StringIO(), stderr=StringIO(), timeout=10800, check_status=False,
+ omit_sudo=False, label='running tests for dummy_encryption from xfstests-dev')
+
+ if proc.returncode != 0:
+ log.info('Command failed.')
+ log.info(f'Command return value: {proc.returncode}')
+ stdout, stderr = proc.stdout.getvalue(), proc.stderr.getvalue()
+ log.info(f'Command stdout -\n{stdout}')
+ log.info(f'Command stderr -\n{stderr}')
+
+ # Currently, many test cases will be skipped due to unsupported features,
+ # but still will be marked as successful.
+ self.assertEqual(proc.returncode, 0)
+ self.assertIn('Passed all ', stdout)
diff --git a/qa/tasks/cephfs/test_fstop.py b/qa/tasks/cephfs/test_fstop.py
new file mode 100644
index 000000000..ed76eaac2
--- /dev/null
+++ b/qa/tasks/cephfs/test_fstop.py
@@ -0,0 +1,114 @@
+import logging
+import json
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+from teuthology.contextutil import safe_while
+
+log = logging.getLogger(__name__)
+
+
+class TestFSTop(CephFSTestCase):
+ CLIENTS_REQUIRED = 2
+
+ def setUp(self):
+ super(TestFSTop, self).setUp()
+ self._enable_mgr_stats_plugin()
+
+ def tearDown(self):
+ self._disable_mgr_stats_plugin()
+ super(TestFSTop, self).tearDown()
+
+ def _enable_mgr_stats_plugin(self):
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "stats")
+
+ def _disable_mgr_stats_plugin(self):
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "stats")
+
+ def _fstop_dump(self, *args):
+ return self.mount_a.run_shell(['cephfs-top',
+ '--id=admin',
+ *args]).stdout.getvalue()
+
+ def _get_metrics(self, verifier_callback, trials, *args):
+ metrics = None
+ done = False
+ with safe_while(sleep=1, tries=trials, action='wait for metrics') as proceed:
+ while proceed():
+ metrics = json.loads(self._fstop_dump(*args))
+ done = verifier_callback(metrics)
+ if done:
+ break
+ return done, metrics
+
+ # TESTS
+ def test_fstop_non_existent_cluster(self):
+ try:
+ self.mount_a.run_shell(['cephfs-top',
+ '--cluster=hpec',
+ '--id=admin',
+ '--selftest'])
+ except CommandFailedError:
+ pass
+ else:
+ raise RuntimeError('expected cephfs-top command to fail.')
+
+ def test_fstop(self):
+ try:
+ self.mount_a.run_shell(['cephfs-top',
+ '--id=admin',
+ '--selftest'])
+ except CommandFailedError:
+ raise RuntimeError('cephfs-top --selftest failed')
+
+ def test_dump(self):
+ """
+ Tests 'cephfs-top --dump' output is valid
+ """
+ def verify_fstop_metrics(metrics):
+ clients = metrics.get(self.fs.name, {})
+ if str(self.mount_a.get_global_id()) in clients and \
+ str(self.mount_b.get_global_id()) in clients:
+ return True
+ return False
+
+ # validate
+ valid, metrics = self._get_metrics(verify_fstop_metrics, 30, '--dump')
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ def test_dumpfs(self):
+ """
+ Tests 'cephfs-top --dumpfs' output is valid
+ """
+ newfs_name = "cephfs_b"
+
+ def verify_fstop_metrics(metrics):
+ clients = metrics.get(newfs_name, {})
+ if self.fs.name not in metrics and \
+ str(self.mount_b.get_global_id()) in clients:
+ return True
+ return False
+
+ # umount mount_b, mount another filesystem on it and use --dumpfs filter
+ self.mount_b.umount_wait()
+
+ self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set", "enable_multiple", "true",
+ "--yes-i-really-mean-it")
+
+ # create a new filesystem
+ fs_b = self.mds_cluster.newfs(name=newfs_name)
+
+ # mount cephfs_b on mount_b
+ self.mount_b.mount_wait(cephfs_name=fs_b.name)
+
+ # validate
+ valid, metrics = self._get_metrics(verify_fstop_metrics, 30,
+ '--dumpfs={}'.format(newfs_name))
+ log.debug("metrics={0}".format(metrics))
+
+ # restore mount_b
+ self.mount_b.umount_wait()
+ self.mount_b.mount_wait(cephfs_name=self.fs.name)
+
+ self.assertTrue(valid)
diff --git a/qa/tasks/cephfs/test_full.py b/qa/tasks/cephfs/test_full.py
new file mode 100644
index 000000000..2b3a7d5f9
--- /dev/null
+++ b/qa/tasks/cephfs/test_full.py
@@ -0,0 +1,398 @@
+import json
+import logging
+import os
+from textwrap import dedent
+from typing import Optional
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+
+log = logging.getLogger(__name__)
+
+
+class FullnessTestCase(CephFSTestCase):
+ CLIENTS_REQUIRED = 2
+
+ # Subclasses define whether they're filling whole cluster or just data pool
+ data_only = False
+
+ # Subclasses define how many bytes should be written to achieve fullness
+ pool_capacity: Optional[int] = None
+ fill_mb = None
+
+ def is_full(self):
+ return self.fs.is_full()
+
+ def setUp(self):
+ CephFSTestCase.setUp(self)
+
+ mds_status = self.fs.rank_asok(["status"])
+
+ # Capture the initial OSD map epoch for later use
+ self.initial_osd_epoch = mds_status['osdmap_epoch_barrier']
+
+ def test_barrier(self):
+ """
+ That when an OSD epoch barrier is set on an MDS, subsequently
+ issued capabilities cause clients to update their OSD map to that
+ epoch.
+ """
+
+ # script that sync up client with MDS OSD map barrier. The barrier should
+ # be updated by cap flush ack message.
+ pyscript = dedent("""
+ import os
+ fd = os.open("{path}", os.O_CREAT | os.O_RDWR, 0O600)
+ os.fchmod(fd, 0O666)
+ os.fsync(fd)
+ os.close(fd)
+ """)
+
+ # Sync up client with initial MDS OSD map barrier.
+ path = os.path.join(self.mount_a.mountpoint, "foo")
+ self.mount_a.run_python(pyscript.format(path=path))
+
+ # Grab mounts' initial OSD epochs: later we will check that
+ # it hasn't advanced beyond this point.
+ mount_a_initial_epoch, mount_a_initial_barrier = self.mount_a.get_osd_epoch()
+
+ # Freshly mounted at start of test, should be up to date with OSD map
+ self.assertGreaterEqual(mount_a_initial_epoch, self.initial_osd_epoch)
+
+ # Set and unset a flag to cause OSD epoch to increment
+ self.fs.mon_manager.raw_cluster_cmd("osd", "set", "pause")
+ self.fs.mon_manager.raw_cluster_cmd("osd", "unset", "pause")
+
+ out = self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip()
+ new_epoch = json.loads(out)['epoch']
+ self.assertNotEqual(self.initial_osd_epoch, new_epoch)
+
+ # Do a metadata operation on clients, witness that they end up with
+ # the old OSD map from startup time (nothing has prompted client
+ # to update its map)
+ path = os.path.join(self.mount_a.mountpoint, "foo")
+ self.mount_a.run_python(pyscript.format(path=path))
+ mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch()
+ self.assertEqual(mount_a_epoch, mount_a_initial_epoch)
+ self.assertEqual(mount_a_barrier, mount_a_initial_barrier)
+
+ # Set a barrier on the MDS
+ self.fs.rank_asok(["osdmap", "barrier", new_epoch.__str__()])
+
+ # Sync up client with new MDS OSD map barrier
+ path = os.path.join(self.mount_a.mountpoint, "baz")
+ self.mount_a.run_python(pyscript.format(path=path))
+ mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch()
+ self.assertEqual(mount_a_barrier, new_epoch)
+
+ # Some time passes here because the metadata part of the operation
+ # completes immediately, while the resulting OSD map update happens
+ # asynchronously (it's an Objecter::_maybe_request_map) as a result
+ # of seeing the new epoch barrier.
+ self.wait_until_true(
+ lambda: self.mount_a.get_osd_epoch()[0] >= new_epoch,
+ timeout=30)
+
+ def _data_pool_name(self):
+ data_pool_names = self.fs.get_data_pool_names()
+ if len(data_pool_names) > 1:
+ raise RuntimeError("This test can't handle multiple data pools")
+ else:
+ return data_pool_names[0]
+
+ def _test_full(self, easy_case):
+ """
+ - That a client trying to write data to a file is prevented
+ from doing so with an -EFULL result
+ - That they are also prevented from creating new files by the MDS.
+ - That they may delete another file to get the system healthy again
+
+ :param easy_case: if true, delete a successfully written file to
+ free up space. else, delete the file that experienced
+ the failed write.
+ """
+
+ osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd'))
+
+ log.info("Writing {0}MB should fill this cluster".format(self.fill_mb))
+
+ # Fill up the cluster. This dd may or may not fail, as it depends on
+ # how soon the cluster recognises its own fullness
+ self.mount_a.write_n_mb("large_file_a", self.fill_mb // 2)
+ try:
+ self.mount_a.write_n_mb("large_file_b", (self.fill_mb * 1.1) // 2)
+ except CommandFailedError:
+ log.info("Writing file B failed (full status happened already)")
+ assert self.is_full()
+ else:
+ log.info("Writing file B succeeded (full status will happen soon)")
+ self.wait_until_true(lambda: self.is_full(),
+ timeout=osd_mon_report_interval * 120)
+
+ # Attempting to write more data should give me ENOSPC
+ with self.assertRaises(CommandFailedError) as ar:
+ self.mount_a.write_n_mb("large_file_b", 50, seek=self.fill_mb // 2)
+ self.assertEqual(ar.exception.exitstatus, 1) # dd returns 1 on "No space"
+
+ # Wait for the MDS to see the latest OSD map so that it will reliably
+ # be applying the policy of rejecting non-deletion metadata operations
+ # while in the full state.
+ osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
+ self.wait_until_true(
+ lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch,
+ timeout=10)
+
+ if not self.data_only:
+ with self.assertRaises(CommandFailedError):
+ self.mount_a.write_n_mb("small_file_1", 0)
+
+ # Clear out some space
+ if easy_case:
+ self.mount_a.run_shell(['rm', '-f', 'large_file_a'])
+ self.mount_a.run_shell(['rm', '-f', 'large_file_b'])
+ else:
+ # In the hard case it is the file that filled the system.
+ # Before the new #7317 (ENOSPC, epoch barrier) changes, this
+ # would fail because the last objects written would be
+ # stuck in the client cache as objecter operations.
+ self.mount_a.run_shell(['rm', '-f', 'large_file_b'])
+ self.mount_a.run_shell(['rm', '-f', 'large_file_a'])
+
+ # Here we are waiting for two things to happen:
+ # * The MDS to purge the stray folder and execute object deletions
+ # * The OSDs to inform the mon that they are no longer full
+ self.wait_until_true(lambda: not self.is_full(),
+ timeout=osd_mon_report_interval * 120)
+
+ # Wait for the MDS to see the latest OSD map so that it will reliably
+ # be applying the free space policy
+ osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
+ self.wait_until_true(
+ lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch,
+ timeout=10)
+
+ # Now I should be able to write again
+ self.mount_a.write_n_mb("large_file", 50, seek=0)
+
+ # Ensure that the MDS keeps its OSD epoch barrier across a restart
+
+ def test_full_different_file(self):
+ self._test_full(True)
+
+ def test_full_same_file(self):
+ self._test_full(False)
+
+ def _remote_write_test(self, template):
+ """
+ Run some remote python in a way that's useful for
+ testing free space behaviour (see test_* methods using this)
+ """
+ file_path = os.path.join(self.mount_a.mountpoint, "full_test_file")
+
+ # Enough to trip the full flag
+ osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd'))
+ mon_tick_interval = int(self.fs.get_config("mon_tick_interval", service_type="mon"))
+
+ # Sufficient data to cause RADOS cluster to go 'full'
+ log.info("pool capacity {0}, {1}MB should be enough to fill it".format(self.pool_capacity, self.fill_mb))
+
+ # Long enough for RADOS cluster to notice it is full and set flag on mons
+ # (report_interval for mon to learn PG stats, tick interval for it to update OSD map,
+ # factor of 1.5 for I/O + network latency in committing OSD map and distributing it
+ # to the OSDs)
+ full_wait = (osd_mon_report_interval + mon_tick_interval) * 1.5
+
+ # Configs for this test should bring this setting down in order to
+ # run reasonably quickly
+ if osd_mon_report_interval > 10:
+ log.warning("This test may run rather slowly unless you decrease"
+ "osd_mon_report_interval (5 is a good setting)!")
+
+ # set the object_size to 1MB to make the objects destributed more evenly
+ # among the OSDs to fix Tracker#45434
+ file_layout = "stripe_unit=1048576 stripe_count=1 object_size=1048576"
+ self.mount_a.run_python(template.format(
+ fill_mb=self.fill_mb,
+ file_path=file_path,
+ file_layout=file_layout,
+ full_wait=full_wait,
+ is_fuse=isinstance(self.mount_a, FuseMount)
+ ))
+
+ def test_full_fclose(self):
+ # A remote script which opens a file handle, fills up the filesystem, and then
+ # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
+ remote_script = dedent("""
+ import time
+ import datetime
+ import subprocess
+ import os
+
+ # Write some buffered data through before going full, all should be well
+ print("writing some data through which we expect to succeed")
+ bytes = 0
+ f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT)
+ os.setxattr("{file_path}", 'ceph.file.layout', b'{file_layout}')
+ bytes += os.write(f, b'a' * 512 * 1024)
+ os.fsync(f)
+ print("fsync'ed data successfully, will now attempt to fill fs")
+
+ # Okay, now we're going to fill up the filesystem, and then keep
+ # writing until we see an error from fsync. As long as we're doing
+ # buffered IO, the error should always only appear from fsync and not
+ # from write
+ full = False
+
+ for n in range(0, int({fill_mb} * 0.9)):
+ bytes += os.write(f, b'x' * 1024 * 1024)
+ print("wrote {{0}} bytes via buffered write, may repeat".format(bytes))
+ print("done writing {{0}} bytes".format(bytes))
+
+ # OK, now we should sneak in under the full condition
+ # due to the time it takes the OSDs to report to the
+ # mons, and get a successful fsync on our full-making data
+ os.fsync(f)
+ print("successfully fsync'ed prior to getting full state reported")
+
+ # buffered write, add more dirty data to the buffer
+ print("starting buffered write")
+ try:
+ for n in range(0, int({fill_mb} * 0.2)):
+ bytes += os.write(f, b'x' * 1024 * 1024)
+ print("sleeping a bit as we've exceeded 90% of our expected full ratio")
+ time.sleep({full_wait})
+ except OSError:
+ pass;
+
+ print("wrote, now waiting 30s and then doing a close we expect to fail")
+
+ # Wait long enough for a background flush that should fail
+ time.sleep(30)
+
+ if {is_fuse}:
+ # ...and check that the failed background flush is reflected in fclose
+ try:
+ os.close(f)
+ except OSError:
+ print("close() returned an error as expected")
+ else:
+ raise RuntimeError("close() failed to raise error")
+ else:
+ # The kernel cephfs client does not raise errors on fclose
+ os.close(f)
+
+ os.unlink("{file_path}")
+ """)
+ self._remote_write_test(remote_script)
+
+ def test_full_fsync(self):
+ """
+ That when the full flag is encountered during asynchronous
+ flushes, such that an fwrite() succeeds but an fsync/fclose()
+ should return the ENOSPC error.
+ """
+
+ # A remote script which opens a file handle, fills up the filesystem, and then
+ # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync
+ remote_script = dedent("""
+ import time
+ import datetime
+ import subprocess
+ import os
+
+ # Write some buffered data through before going full, all should be well
+ print("writing some data through which we expect to succeed")
+ bytes = 0
+ f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT)
+ os.setxattr("{file_path}", 'ceph.file.layout', b'{file_layout}')
+ bytes += os.write(f, b'a' * 4096)
+ os.fsync(f)
+ print("fsync'ed data successfully, will now attempt to fill fs")
+
+ # Okay, now we're going to fill up the filesystem, and then keep
+ # writing until we see an error from fsync. As long as we're doing
+ # buffered IO, the error should always only appear from fsync and not
+ # from write
+ full = False
+
+ for n in range(0, int({fill_mb} * 1.1)):
+ try:
+ bytes += os.write(f, b'x' * 1024 * 1024)
+ print("wrote bytes via buffered write, moving on to fsync")
+ except OSError as e:
+ if {is_fuse}:
+ print("Unexpected error %s from write() instead of fsync()" % e)
+ raise
+ else:
+ print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0)))
+ full = True
+ break
+
+ try:
+ os.fsync(f)
+ print("fsync'ed successfully")
+ except OSError as e:
+ print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0)))
+ full = True
+ break
+ else:
+ print("Not full yet after %.2f MB" % (bytes / (1024.0 * 1024.0)))
+
+ if n > {fill_mb} * 0.9:
+ # Be cautious in the last region where we expect to hit
+ # the full condition, so that we don't overshoot too dramatically
+ print("sleeping a bit as we've exceeded 90% of our expected full ratio")
+ time.sleep({full_wait})
+
+ if not full:
+ raise RuntimeError("Failed to reach fullness after writing %d bytes" % bytes)
+
+ # close() should not raise an error because we already caught it in
+ # fsync. There shouldn't have been any more writeback errors
+ # since then because all IOs got cancelled on the full flag.
+ print("calling close")
+ os.close(f)
+ print("close() did not raise error")
+
+ os.unlink("{file_path}")
+ """)
+
+ self._remote_write_test(remote_script)
+
+
+class TestQuotaFull(FullnessTestCase):
+ """
+ Test per-pool fullness, which indicates quota limits exceeded
+ """
+ pool_capacity = 1024 * 1024 * 32 # arbitrary low-ish limit
+ fill_mb = pool_capacity // (1024 * 1024) # type: ignore
+
+ # We are only testing quota handling on the data pool, not the metadata
+ # pool.
+ data_only = True
+
+ def setUp(self):
+ super(TestQuotaFull, self).setUp()
+
+ pool_name = self.fs.get_data_pool_name()
+ self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", pool_name,
+ "max_bytes", "{0}".format(self.pool_capacity))
+
+
+class TestClusterFull(FullnessTestCase):
+ """
+ Test data pool fullness, which indicates that an OSD has become too full
+ """
+ pool_capacity = None
+ REQUIRE_MEMSTORE = True
+
+ def setUp(self):
+ super(TestClusterFull, self).setUp()
+
+ if self.pool_capacity is None:
+ TestClusterFull.pool_capacity = self.fs.get_pool_df(self._data_pool_name())['max_avail']
+ TestClusterFull.fill_mb = (self.pool_capacity // (1024 * 1024))
+
+# Hide the parent class so that unittest.loader doesn't try to run it.
+del globals()['FullnessTestCase']
diff --git a/qa/tasks/cephfs/test_journal_migration.py b/qa/tasks/cephfs/test_journal_migration.py
new file mode 100644
index 000000000..67b514c22
--- /dev/null
+++ b/qa/tasks/cephfs/test_journal_migration.py
@@ -0,0 +1,100 @@
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from tasks.workunit import task as workunit
+
+JOURNAL_FORMAT_LEGACY = 0
+JOURNAL_FORMAT_RESILIENT = 1
+
+
+class TestJournalMigration(CephFSTestCase):
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 2
+
+ def test_journal_migration(self):
+ old_journal_version = JOURNAL_FORMAT_LEGACY
+ new_journal_version = JOURNAL_FORMAT_RESILIENT
+
+ self.mount_a.umount_wait()
+ self.fs.mds_stop()
+
+ # Create a filesystem using the older journal format.
+ self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version)
+ self.fs.mds_restart()
+ self.fs.recreate()
+
+ # Enable standby replay, to cover the bug case #8811 where
+ # a standby replay might mistakenly end up trying to rewrite
+ # the journal at the same time as an active daemon.
+ self.fs.set_allow_standby_replay(True)
+
+ status = self.fs.wait_for_daemons()
+
+ self.assertTrue(self.fs.get_replay(status=status) is not None)
+
+ # Do some client work so that the log is populated with something.
+ with self.mount_a.mounted_wait():
+ self.mount_a.create_files()
+ self.mount_a.check_files() # sanity, this should always pass
+
+ # Run a more substantial workunit so that the length of the log to be
+ # coverted is going span at least a few segments
+ workunit(self.ctx, {
+ 'clients': {
+ "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"],
+ },
+ "timeout": "3h"
+ })
+
+ # Modify the ceph.conf to ask the MDS to use the new journal format.
+ self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version)
+
+ # Restart the MDS.
+ self.fs.mds_fail_restart()
+
+ # This ensures that all daemons come up into a valid state
+ status = self.fs.wait_for_daemons()
+
+ # Check that files created in the initial client workload are still visible
+ # in a client mount.
+ with self.mount_a.mounted_wait():
+ self.mount_a.check_files()
+
+ # Verify that the journal really has been rewritten.
+ journal_version = self.fs.get_journal_version()
+ if journal_version != new_journal_version:
+ raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format(
+ new_journal_version, journal_version()
+ ))
+
+ # Verify that cephfs-journal-tool can now read the rewritten journal
+ inspect_out = self.fs.journal_tool(["journal", "inspect"], 0)
+ if not inspect_out.endswith(": OK"):
+ raise RuntimeError("Unexpected journal-tool result: '{0}'".format(
+ inspect_out
+ ))
+
+ self.fs.journal_tool(["event", "get", "json",
+ "--path", "/tmp/journal.json"], 0)
+ p = self.fs.tool_remote.sh([
+ "python3",
+ "-c",
+ "import json; print(len(json.load(open('/tmp/journal.json'))))"
+ ])
+ event_count = int(p.strip())
+ if event_count < 1000:
+ # Approximate value of "lots", expected from having run fsstress
+ raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count))
+
+ # Do some client work to check that writing the log is still working
+ with self.mount_a.mounted_wait():
+ workunit(self.ctx, {
+ 'clients': {
+ "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"],
+ },
+ "timeout": "3h"
+ })
+
+ # Check that both an active and a standby replay are still up
+ status = self.fs.status()
+ self.assertEqual(len(list(self.fs.get_replays(status=status))), 1)
+ self.assertEqual(len(list(self.fs.get_ranks(status=status))), 1)
diff --git a/qa/tasks/cephfs/test_journal_repair.py b/qa/tasks/cephfs/test_journal_repair.py
new file mode 100644
index 000000000..c5769784d
--- /dev/null
+++ b/qa/tasks/cephfs/test_journal_repair.py
@@ -0,0 +1,405 @@
+
+"""
+Test our tools for recovering the content of damaged journals
+"""
+
+import json
+import logging
+from textwrap import dedent
+import time
+
+from teuthology.exceptions import CommandFailedError, ConnectionLostError
+from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
+from tasks.workunit import task as workunit
+
+log = logging.getLogger(__name__)
+
+
+class TestJournalRepair(CephFSTestCase):
+ MDSS_REQUIRED = 2
+
+ def test_inject_to_empty(self):
+ """
+ That when some dentries in the journal but nothing is in
+ the backing store, we correctly populate the backing store
+ from the journalled dentries.
+ """
+
+ # Inject metadata operations
+ self.mount_a.run_shell(["touch", "rootfile"])
+ self.mount_a.run_shell(["mkdir", "subdir"])
+ self.mount_a.run_shell(["touch", "subdir/subdirfile"])
+ # There are several different paths for handling hardlinks, depending
+ # on whether an existing dentry (being overwritten) is also a hardlink
+ self.mount_a.run_shell(["mkdir", "linkdir"])
+
+ # Test inode -> remote transition for a dentry
+ self.mount_a.run_shell(["touch", "linkdir/link0"])
+ self.mount_a.run_shell(["rm", "-f", "linkdir/link0"])
+ self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link0"])
+
+ # Test nothing -> remote transition
+ self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link1"])
+
+ # Test remote -> inode transition
+ self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link2"])
+ self.mount_a.run_shell(["rm", "-f", "linkdir/link2"])
+ self.mount_a.run_shell(["touch", "linkdir/link2"])
+
+ # Test remote -> diff remote transition
+ self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link3"])
+ self.mount_a.run_shell(["rm", "-f", "linkdir/link3"])
+ self.mount_a.run_shell(["ln", "rootfile", "linkdir/link3"])
+
+ # Test an empty directory
+ self.mount_a.run_shell(["mkdir", "subdir/subsubdir"])
+ self.mount_a.run_shell(["sync"])
+
+ # Before we unmount, make a note of the inode numbers, later we will
+ # check that they match what we recover from the journal
+ rootfile_ino = self.mount_a.path_to_ino("rootfile")
+ subdir_ino = self.mount_a.path_to_ino("subdir")
+ linkdir_ino = self.mount_a.path_to_ino("linkdir")
+ subdirfile_ino = self.mount_a.path_to_ino("subdir/subdirfile")
+ subsubdir_ino = self.mount_a.path_to_ino("subdir/subsubdir")
+
+ self.mount_a.umount_wait()
+
+ # Stop the MDS
+ self.fs.fail()
+
+ # Now, the journal should contain the operations, but the backing
+ # store shouldn't
+ with self.assertRaises(ObjectNotFound):
+ self.fs.list_dirfrag(subdir_ino)
+ self.assertEqual(self.fs.list_dirfrag(ROOT_INO), [])
+
+ # Execute the dentry recovery, this should populate the backing store
+ self.fs.journal_tool(['event', 'recover_dentries', 'list'], 0)
+
+ # Dentries in ROOT_INO are present
+ self.assertEqual(sorted(self.fs.list_dirfrag(ROOT_INO)), sorted(['rootfile_head', 'subdir_head', 'linkdir_head']))
+ self.assertEqual(self.fs.list_dirfrag(subdir_ino), ['subdirfile_head', 'subsubdir_head'])
+ self.assertEqual(sorted(self.fs.list_dirfrag(linkdir_ino)),
+ sorted(['link0_head', 'link1_head', 'link2_head', 'link3_head']))
+
+ # Now check the MDS can read what we wrote: truncate the journal
+ # and start the mds.
+ self.fs.journal_tool(['journal', 'reset'], 0)
+ self.fs.set_joinable()
+ self.fs.wait_for_daemons()
+
+ # List files
+ self.mount_a.mount_wait()
+
+ # First ls -R to populate MDCache, such that hardlinks will
+ # resolve properly (recover_dentries does not create backtraces,
+ # so ordinarily hardlinks to inodes that happen not to have backtraces
+ # will be invisible in readdir).
+ # FIXME: hook in forward scrub here to regenerate backtraces
+ proc = self.mount_a.run_shell(['ls', '-R'])
+ self.mount_a.umount_wait() # remount to clear client cache before our second ls
+ self.mount_a.mount_wait()
+
+ proc = self.mount_a.run_shell(['ls', '-R'])
+ self.assertEqual(proc.stdout.getvalue().strip(),
+ dedent("""
+ .:
+ linkdir
+ rootfile
+ subdir
+
+ ./linkdir:
+ link0
+ link1
+ link2
+ link3
+
+ ./subdir:
+ subdirfile
+ subsubdir
+
+ ./subdir/subsubdir:
+ """).strip())
+
+ # Check the correct inos were preserved by path
+ self.assertEqual(rootfile_ino, self.mount_a.path_to_ino("rootfile"))
+ self.assertEqual(subdir_ino, self.mount_a.path_to_ino("subdir"))
+ self.assertEqual(subdirfile_ino, self.mount_a.path_to_ino("subdir/subdirfile"))
+ self.assertEqual(subsubdir_ino, self.mount_a.path_to_ino("subdir/subsubdir"))
+
+ # Check that the hard link handling came out correctly
+ self.assertEqual(self.mount_a.path_to_ino("linkdir/link0"), subdirfile_ino)
+ self.assertEqual(self.mount_a.path_to_ino("linkdir/link1"), subdirfile_ino)
+ self.assertNotEqual(self.mount_a.path_to_ino("linkdir/link2"), subdirfile_ino)
+ self.assertEqual(self.mount_a.path_to_ino("linkdir/link3"), rootfile_ino)
+
+ # Create a new file, ensure it is not issued the same ino as one of the
+ # recovered ones
+ self.mount_a.run_shell(["touch", "afterwards"])
+ new_ino = self.mount_a.path_to_ino("afterwards")
+ self.assertNotIn(new_ino, [rootfile_ino, subdir_ino, subdirfile_ino])
+
+ # Check that we can do metadata ops in the recovered directory
+ self.mount_a.run_shell(["touch", "subdir/subsubdir/subsubdirfile"])
+
+ @for_teuthology # 308s
+ def test_reset(self):
+ """
+ That after forcibly modifying the backing store, we can get back into
+ a good state by resetting the MDSMap.
+
+ The scenario is that we have two active MDSs, and we lose the journals. Once
+ we have completely lost confidence in the integrity of the metadata, we want to
+ return the system to a single-MDS state to go into a scrub to recover what we
+ can.
+ """
+
+ # Set max_mds to 2
+ self.fs.set_max_mds(2)
+ status = self.fs.wait_for_daemons()
+ rank0_gid = self.fs.get_rank(rank=0, status=status)['gid']
+ self.fs.set_joinable(False) # no unintended failover
+
+ # Create a dir on each rank
+ self.mount_a.run_shell_payload("mkdir {alpha,bravo} && touch {alpha,bravo}/file")
+ self.mount_a.setfattr("alpha/", "ceph.dir.pin", "0")
+ self.mount_a.setfattr("bravo/", "ceph.dir.pin", "1")
+
+ # Ensure the pinning has taken effect and the /bravo dir is now
+ # migrated to rank 1.
+ self._wait_subtrees([('/bravo', 1), ('/alpha', 0)], rank=0, status=status)
+
+ # Do some IO (this should be split across ranks according to
+ # the rank-pinned dirs)
+ self.mount_a.create_n_files("alpha/file", 1000)
+ self.mount_a.create_n_files("bravo/file", 1000)
+
+ # Flush the journals so that we have some backing store data
+ # belonging to one MDS, and some to the other MDS.
+ self.fs.rank_asok(["flush", "journal"], rank=0)
+ self.fs.rank_asok(["flush", "journal"], rank=1)
+
+ # Stop (hard) the second MDS daemon
+ self.fs.rank_fail(rank=1)
+
+ # Wipe out the tables for MDS rank 1 so that it is broken and can't start
+ # (this is the simulated failure that we will demonstrate that the disaster
+ # recovery tools can get us back from)
+ self.fs.erase_metadata_objects(prefix="mds1_")
+
+ # Try to access files from the client
+ blocked_ls = self.mount_a.run_shell(["ls", "-R"], wait=False)
+
+ # Check that this "ls -R" blocked rather than completing: indicates
+ # it got stuck trying to access subtrees which were on the now-dead MDS.
+ log.info("Sleeping to check ls is blocked...")
+ time.sleep(60)
+ self.assertFalse(blocked_ls.finished)
+
+ # This mount is now useless because it will depend on MDS rank 1, and MDS rank 1
+ # is not coming back. Kill it.
+ log.info("Killing mount, it's blocked on the MDS we killed")
+ self.mount_a.kill()
+ self.mount_a.kill_cleanup()
+ try:
+ # Now that the mount is dead, the ls -R should error out.
+ blocked_ls.wait()
+ except (CommandFailedError, ConnectionLostError):
+ # The ConnectionLostError case is for kernel client, where
+ # killing the mount also means killing the node.
+ pass
+
+ # See that the second MDS will crash when it starts and tries to
+ # acquire rank 1
+ self.fs.set_joinable(True)
+
+ # The daemon taking the damaged rank should start starting, then
+ # restart back into standby after asking the mon to mark the rank
+ # damaged.
+ def is_marked_damaged():
+ mds_map = self.fs.get_mds_map()
+ return 1 in mds_map['damaged']
+
+ self.wait_until_true(is_marked_damaged, 60)
+ self.assertEqual(rank0_gid, self.fs.get_rank(rank=0)['gid'])
+
+ # Now give up and go through a disaster recovery procedure
+ self.fs.fail()
+ # Invoke recover_dentries quietly, because otherwise log spews millions of lines
+ self.fs.journal_tool(["event", "recover_dentries", "summary"], 0, quiet=True)
+ self.fs.journal_tool(["event", "recover_dentries", "summary"], 1, quiet=True)
+ self.fs.table_tool(["0", "reset", "session"])
+ self.fs.journal_tool(["journal", "reset"], 0)
+ self.fs.erase_mds_objects(1)
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name,
+ '--yes-i-really-mean-it')
+
+ # Bring an MDS back online, mount a client, and see that we can walk the full
+ # filesystem tree again
+ self.fs.set_joinable(True) # redundant with `fs reset`
+ status = self.fs.wait_for_daemons()
+ self.assertEqual(len(list(self.fs.get_ranks(status=status))), 1)
+ self.mount_a.mount_wait()
+ self.mount_a.run_shell(["ls", "-R"], wait=True)
+
+ def test_table_tool(self):
+ self.mount_a.run_shell(["touch", "foo"])
+ self.fs.rank_asok(["flush", "journal"])
+
+ log.info(self.fs.table_tool(["all", "show", "inode"]))
+ log.info(self.fs.table_tool(["all", "show", "snap"]))
+ log.info(self.fs.table_tool(["all", "show", "session"]))
+
+ # Inode table should always be the same because initial state
+ # and choice of inode are deterministic.
+ # Should see one inode consumed
+ self.assertEqual(
+ json.loads(self.fs.table_tool(["all", "show", "inode"])),
+ {"0": {
+ "data": {
+ "version": 2,
+ "inotable": {
+ "projected_free": [
+ {"start": 1099511628777,
+ "len": 1099511626775}],
+ "free": [
+ {"start": 1099511628777,
+ "len": 1099511626775}]}},
+ "result": 0}}
+
+ )
+
+ # Should see one session
+ session_data = json.loads(self.fs.table_tool(
+ ["all", "show", "session"]))
+ self.assertEqual(len(session_data["0"]["data"]["sessions"]), 1)
+ self.assertEqual(session_data["0"]["result"], 0)
+
+ # Should see no snaps
+ self.assertEqual(
+ json.loads(self.fs.table_tool(["all", "show", "snap"])),
+ {"version": 1,
+ "snapserver": {"last_snap": 1,
+ "last_created": 1,
+ "last_destroyed": 1,
+ "pending_noop": [],
+ "snaps": [],
+ "need_to_purge": {},
+ "pending_update": [],
+ "pending_destroy": []},
+ "result": 0}
+ )
+
+ # Reset everything
+ for table in ["session", "inode", "snap"]:
+ self.fs.table_tool(["all", "reset", table])
+
+ log.info(self.fs.table_tool(["all", "show", "inode"]))
+ log.info(self.fs.table_tool(["all", "show", "snap"]))
+ log.info(self.fs.table_tool(["all", "show", "session"]))
+
+ # Should see 0 sessions
+ session_data = json.loads(self.fs.table_tool(
+ ["all", "show", "session"]))
+ self.assertEqual(len(session_data["0"]["data"]["sessions"]), 0)
+ self.assertEqual(session_data["0"]["result"], 0)
+
+ # Should see entire inode range now marked free
+ self.assertEqual(
+ json.loads(self.fs.table_tool(["all", "show", "inode"])),
+ {"0": {"data": {"version": 1,
+ "inotable": {"projected_free": [
+ {"start": 1099511627776,
+ "len": 1099511627776}],
+ "free": [
+ {"start": 1099511627776,
+ "len": 1099511627776}]}},
+ "result": 0}}
+ )
+
+ # Should see no snaps
+ self.assertEqual(
+ json.loads(self.fs.table_tool(["all", "show", "snap"])),
+ {"version": 1,
+ "snapserver": {"last_snap": 1,
+ "last_created": 1,
+ "last_destroyed": 1,
+ "pending_noop": [],
+ "snaps": [],
+ "need_to_purge": {},
+ "pending_update": [],
+ "pending_destroy": []},
+ "result": 0}
+ )
+
+ def test_table_tool_take_inos(self):
+ initial_range_start = 1099511627776
+ initial_range_len = 1099511627776
+ # Initially a completely clear range
+ self.assertEqual(
+ json.loads(self.fs.table_tool(["all", "show", "inode"])),
+ {"0": {"data": {"version": 0,
+ "inotable": {"projected_free": [
+ {"start": initial_range_start,
+ "len": initial_range_len}],
+ "free": [
+ {"start": initial_range_start,
+ "len": initial_range_len}]}},
+ "result": 0}}
+ )
+
+ # Remove some
+ self.assertEqual(
+ json.loads(self.fs.table_tool(["all", "take_inos", "{0}".format(initial_range_start + 100)])),
+ {"0": {"data": {"version": 1,
+ "inotable": {"projected_free": [
+ {"start": initial_range_start + 101,
+ "len": initial_range_len - 101}],
+ "free": [
+ {"start": initial_range_start + 101,
+ "len": initial_range_len - 101}]}},
+ "result": 0}}
+ )
+
+ @for_teuthology # Hack: "for_teuthology" because .sh doesn't work outside teuth
+ def test_journal_smoke(self):
+ workunit(self.ctx, {
+ 'clients': {
+ "client.{0}".format(self.mount_a.client_id): [
+ "fs/misc/trivial_sync.sh"],
+ },
+ "timeout": "1h"
+ })
+
+ for mount in self.mounts:
+ mount.umount_wait()
+
+ self.fs.fail()
+
+ # journal tool smoke
+ workunit(self.ctx, {
+ 'clients': {
+ "client.{0}".format(self.mount_a.client_id): [
+ "suites/cephfs_journal_tool_smoke.sh"],
+ },
+ "timeout": "1h"
+ })
+
+
+
+ self.fs.set_joinable()
+ self.fs.wait_for_daemons()
+
+ self.mount_a.mount_wait()
+
+ # trivial sync moutn a
+ workunit(self.ctx, {
+ 'clients': {
+ "client.{0}".format(self.mount_a.client_id): [
+ "fs/misc/trivial_sync.sh"],
+ },
+ "timeout": "1h"
+ })
+
diff --git a/qa/tasks/cephfs/test_mantle.py b/qa/tasks/cephfs/test_mantle.py
new file mode 100644
index 000000000..746c2ffe3
--- /dev/null
+++ b/qa/tasks/cephfs/test_mantle.py
@@ -0,0 +1,111 @@
+from io import StringIO
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+import json
+import logging
+
+log = logging.getLogger(__name__)
+failure = "using old balancer; mantle failed for balancer="
+success = "mantle balancer version changed: "
+
+class TestMantle(CephFSTestCase):
+ def start_mantle(self):
+ self.wait_for_health_clear(timeout=30)
+ self.fs.set_max_mds(2)
+ self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30,
+ reject_fn=lambda v: v > 2 or v < 1)
+
+ for m in self.fs.get_active_names():
+ self.fs.mds_asok(['config', 'set', 'debug_objecter', '20'], mds_id=m)
+ self.fs.mds_asok(['config', 'set', 'debug_ms', '0'], mds_id=m)
+ self.fs.mds_asok(['config', 'set', 'debug_mds', '0'], mds_id=m)
+ self.fs.mds_asok(['config', 'set', 'debug_mds_balancer', '5'], mds_id=m)
+
+ def push_balancer(self, obj, lua_code, expect):
+ self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', obj)
+ self.fs.radosm(["put", obj, "-"], stdin=StringIO(lua_code))
+ with self.assert_cluster_log(failure + obj + " " + expect):
+ log.info("run a " + obj + " balancer that expects=" + expect)
+
+ def test_version_empty(self):
+ self.start_mantle()
+ expect = " : (2) No such file or directory"
+
+ ret = self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer')
+ assert(ret == 22) # EINVAL
+
+ self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', " ")
+ with self.assert_cluster_log(failure + " " + expect): pass
+
+ def test_version_not_in_rados(self):
+ self.start_mantle()
+ expect = failure + "ghost.lua : (2) No such file or directory"
+ self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "ghost.lua")
+ with self.assert_cluster_log(expect): pass
+
+ def test_balancer_invalid(self):
+ self.start_mantle()
+ expect = ": (22) Invalid argument"
+
+ lua_code = "this is invalid lua code!"
+ self.push_balancer("invalid.lua", lua_code, expect)
+
+ lua_code = "BAL_LOG()"
+ self.push_balancer("invalid_log.lua", lua_code, expect)
+
+ lua_code = "BAL_LOG(0)"
+ self.push_balancer("invalid_log_again.lua", lua_code, expect)
+
+ def test_balancer_valid(self):
+ self.start_mantle()
+ lua_code = "BAL_LOG(0, \"test\")\nreturn {3, 4}"
+ self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua")
+ self.fs.radosm(["put", "valid.lua", "-"], stdin=StringIO(lua_code))
+ with self.assert_cluster_log(success + "valid.lua"):
+ log.info("run a valid.lua balancer")
+
+ def test_return_invalid(self):
+ self.start_mantle()
+ expect = ": (22) Invalid argument"
+
+ lua_code = "return \"hello\""
+ self.push_balancer("string.lua", lua_code, expect)
+
+ lua_code = "return 3"
+ self.push_balancer("number.lua", lua_code, expect)
+
+ lua_code = "return {}"
+ self.push_balancer("dict_empty.lua", lua_code, expect)
+
+ lua_code = "return {\"this\", \"is\", \"a\", \"test\"}"
+ self.push_balancer("dict_of_strings.lua", lua_code, expect)
+
+ lua_code = "return {3, \"test\"}"
+ self.push_balancer("dict_of_mixed.lua", lua_code, expect)
+
+ lua_code = "return {3}"
+ self.push_balancer("not_enough_numbers.lua", lua_code, expect)
+
+ lua_code = "return {3, 4, 5, 6, 7, 8, 9}"
+ self.push_balancer("too_many_numbers.lua", lua_code, expect)
+
+ def test_dead_osd(self):
+ self.start_mantle()
+ expect = " : (110) Connection timed out"
+
+ # kill the OSDs so that the balancer pull from RADOS times out
+ osd_map = json.loads(self.fs.mon_manager.raw_cluster_cmd('osd', 'dump', '--format=json-pretty'))
+ for i in range(0, len(osd_map['osds'])):
+ self.fs.mon_manager.raw_cluster_cmd_result('osd', 'down', str(i))
+ self.fs.mon_manager.raw_cluster_cmd_result('osd', 'out', str(i))
+
+ # trigger a pull from RADOS
+ self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua")
+
+ # make the timeout a little longer since dead OSDs spam ceph -w
+ with self.assert_cluster_log(failure + "valid.lua" + expect, timeout=30):
+ log.info("run a balancer that should timeout")
+
+ # cleanup
+ for i in range(0, len(osd_map['osds'])):
+ self.fs.mon_manager.raw_cluster_cmd_result('osd', 'in', str(i))
diff --git a/qa/tasks/cephfs/test_mds_metrics.py b/qa/tasks/cephfs/test_mds_metrics.py
new file mode 100644
index 000000000..ad877f622
--- /dev/null
+++ b/qa/tasks/cephfs/test_mds_metrics.py
@@ -0,0 +1,643 @@
+import os
+import json
+import time
+import random
+import logging
+import errno
+
+from teuthology.contextutil import safe_while, MaxWhileTries
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+class TestMDSMetrics(CephFSTestCase):
+ CLIENTS_REQUIRED = 2
+ MDSS_REQUIRED = 3
+
+ TEST_DIR_PERFIX = "test_mds_metrics"
+
+ def setUp(self):
+ super(TestMDSMetrics, self).setUp()
+ self._start_with_single_active_mds()
+ self._enable_mgr_stats_plugin()
+
+ def tearDown(self):
+ self._disable_mgr_stats_plugin()
+ super(TestMDSMetrics, self).tearDown()
+
+ def _start_with_single_active_mds(self):
+ curr_max_mds = self.fs.get_var('max_mds')
+ if curr_max_mds > 1:
+ self.fs.shrink(1)
+
+ def verify_mds_metrics(self, active_mds_count=1, client_count=1, ranks=[], mul_fs=[]):
+ def verify_metrics_cbk(metrics):
+ mds_metrics = metrics['metrics']
+ if not len(mds_metrics) == active_mds_count + 1: # n active mdss + delayed set
+ return False
+ fs_status = self.fs.status()
+ nonlocal ranks, mul_fs
+ if not ranks:
+ if not mul_fs:
+ mul_fs = [self.fs.id]
+ for filesystem in mul_fs:
+ ranks = set([info['rank'] for info in fs_status.get_ranks(filesystem)])
+ for rank in ranks:
+ r = mds_metrics.get("mds.{}".format(rank), None)
+ if not r or not len(mds_metrics['delayed_ranks']) == 0:
+ return False
+ for item in mul_fs:
+ key = fs_status.get_fsmap(item)['mdsmap']['fs_name']
+ global_metrics = metrics['global_metrics'].get(key, {})
+ client_metadata = metrics['client_metadata'].get(key, {})
+ if not len(global_metrics) >= client_count or not len(client_metadata) >= client_count:
+ return False
+ return True
+ return verify_metrics_cbk
+
+ def _fs_perf_stats(self, *args):
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", *args)
+
+ def _enable_mgr_stats_plugin(self):
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "stats")
+
+ def _disable_mgr_stats_plugin(self):
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "stats")
+
+ def _spread_directory_on_all_ranks(self, fscid):
+ fs_status = self.fs.status()
+ ranks = set([info['rank'] for info in fs_status.get_ranks(fscid)])
+ # create a per-rank pinned directory
+ for rank in ranks:
+ dirname = "{0}_{1}".format(TestMDSMetrics.TEST_DIR_PERFIX, rank)
+ self.mount_a.run_shell(["mkdir", dirname])
+ self.mount_a.setfattr(dirname, "ceph.dir.pin", str(rank))
+ log.info("pinning directory {0} to rank {1}".format(dirname, rank))
+ for i in range(16):
+ filename = "{0}.{1}".format("test", i)
+ self.mount_a.write_n_mb(os.path.join(dirname, filename), 1)
+
+ def _do_spread_io(self, fscid):
+ # spread readdir I/O
+ self.mount_b.run_shell(["find", "."])
+
+ def _do_spread_io_all_clients(self, fscid):
+ # spread readdir I/O
+ self.mount_a.run_shell(["find", "."])
+ self.mount_b.run_shell(["find", "."])
+
+ def _cleanup_test_dirs(self):
+ dirnames = self.mount_a.run_shell(["ls"]).stdout.getvalue()
+ for dirname in dirnames.split("\n"):
+ if dirname.startswith(TestMDSMetrics.TEST_DIR_PERFIX):
+ log.info("cleaning directory {}".format(dirname))
+ self.mount_a.run_shell(["rm", "-rf", dirname])
+
+ def _get_metrics(self, verifier_callback, trials, *args):
+ metrics = None
+ done = False
+ with safe_while(sleep=1, tries=trials, action='wait for metrics') as proceed:
+ while proceed():
+ metrics = json.loads(self._fs_perf_stats(*args))
+ done = verifier_callback(metrics)
+ if done:
+ break
+ return done, metrics
+
+ def _setup_fs(self, fs_name):
+ fs_a = self.mds_cluster.newfs(name=fs_name)
+
+ self.mds_cluster.mds_restart()
+
+ # Wait for filesystem to go healthy
+ fs_a.wait_for_daemons()
+
+ # Reconfigure client auth caps
+ for mount in self.mounts:
+ self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', f"client.{mount.client_id}",
+ 'mds', 'allow',
+ 'mon', 'allow r',
+ 'osd', f'allow rw pool={fs_a.get_data_pool_name()}')
+
+ return fs_a
+
+ # basic check to verify if we get back metrics from each active mds rank
+
+ def test_metrics_from_rank(self):
+ # validate
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ def test_metrics_post_client_disconnection(self):
+ # validate
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ self.mount_a.umount_wait()
+
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED - 1), 30)
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ def test_metrics_mds_grow(self):
+ # validate
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ # grow the mds cluster
+ self.fs.grow(2)
+
+ fscid = self.fs.id
+ # spread directory per rank
+ self._spread_directory_on_all_ranks(fscid)
+
+ # spread some I/O
+ self._do_spread_io(fscid)
+
+ # wait a bit for mgr to get updated metrics
+ time.sleep(5)
+
+ # validate
+ valid, metrics = self._get_metrics(self.verify_mds_metrics(
+ active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED) , 30)
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ # cleanup test directories
+ self._cleanup_test_dirs()
+
+ def test_metrics_mds_grow_and_shrink(self):
+ # validate
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ # grow the mds cluster
+ self.fs.grow(2)
+
+ fscid = self.fs.id
+ # spread directory per rank
+ self._spread_directory_on_all_ranks(fscid)
+
+ # spread some I/O
+ self._do_spread_io(fscid)
+
+ # wait a bit for mgr to get updated metrics
+ time.sleep(5)
+
+ # validate
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ # shrink mds cluster
+ self.fs.shrink(1)
+
+ # wait a bit for mgr to get updated metrics
+ time.sleep(5)
+
+ # validate
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ # cleanup test directories
+ self._cleanup_test_dirs()
+
+ def test_delayed_metrics(self):
+ # validate
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ # grow the mds cluster
+ self.fs.grow(2)
+
+ fscid = self.fs.id
+ # spread directory per rank
+ self._spread_directory_on_all_ranks(fscid)
+
+ # spread some I/O
+ self._do_spread_io(fscid)
+
+ # wait a bit for mgr to get updated metrics
+ time.sleep(5)
+
+ # validate
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ # do not give this mds any chance
+ delayed_rank = 1
+ mds_id_rank0 = self.fs.get_rank(rank=0)['name']
+ mds_id_rank1 = self.fs.get_rank(rank=1)['name']
+
+ self.fs.set_inter_mds_block(True, mds_id_rank0, mds_id_rank1)
+
+ def verify_delayed_metrics(metrics):
+ mds_metrics = metrics['metrics']
+ r = mds_metrics.get("mds.{}".format(delayed_rank), None)
+ if not r or not delayed_rank in mds_metrics['delayed_ranks']:
+ return False
+ return True
+ # validate
+ valid, metrics = self._get_metrics(verify_delayed_metrics, 30)
+ log.debug("metrics={0}".format(metrics))
+
+ self.assertTrue(valid)
+ self.fs.set_inter_mds_block(False, mds_id_rank0, mds_id_rank1)
+
+ # validate
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ # cleanup test directories
+ self._cleanup_test_dirs()
+
+ def test_query_mds_filter(self):
+ # validate
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ # grow the mds cluster
+ self.fs.grow(2)
+
+ fscid = self.fs.id
+ # spread directory per rank
+ self._spread_directory_on_all_ranks(fscid)
+
+ # spread some I/O
+ self._do_spread_io(fscid)
+
+ # wait a bit for mgr to get updated metrics
+ time.sleep(5)
+
+ # validate
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ filtered_mds = 1
+ def verify_filtered_mds_rank_metrics(metrics):
+ # checks if the metrics has only client_metadata and
+ # global_metrics filtered using --mds_rank=1
+ global_metrics = metrics['global_metrics'].get(self.fs.name, {})
+ client_metadata = metrics['client_metadata'].get(self.fs.name, {})
+ mds_metrics = metrics['metrics']
+ if len(mds_metrics) != 2 or f"mds.{filtered_mds}" not in mds_metrics:
+ return False
+ if len(global_metrics) > TestMDSMetrics.CLIENTS_REQUIRED or\
+ len(client_metadata) > TestMDSMetrics.CLIENTS_REQUIRED:
+ return False
+ if len(set(global_metrics) - set(mds_metrics[f"mds.{filtered_mds}"])) or\
+ len(set(client_metadata) - set(mds_metrics[f"mds.{filtered_mds}"])):
+ return False
+ return True
+ # initiate a new query with `--mds_rank` filter and validate if
+ # we get metrics *only* from that mds.
+ valid, metrics = self._get_metrics(verify_filtered_mds_rank_metrics, 30,
+ f'--mds_rank={filtered_mds}')
+ log.debug(f"metrics={metrics}")
+ self.assertTrue(valid, "Incorrect 'ceph fs perf stats' output"
+ f" with filter '--mds_rank={filtered_mds}'")
+
+ def test_query_client_filter(self):
+ # validate
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ mds_metrics = metrics['metrics']
+ # pick an random client
+ client = random.choice(list(mds_metrics['mds.0'].keys()))
+ # could have used regex to extract client id
+ client_id = (client.split(' ')[0]).split('.')[-1]
+
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(client_count=1), 30, '--client_id={}'.format(client_id))
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ def test_query_client_ip_filter(self):
+ # validate
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ client_matadata = metrics['client_metadata'][self.fs.name]
+ # pick an random client
+ client = random.choice(list(client_matadata.keys()))
+ # get IP of client to use in filter
+ client_ip = client_matadata[client]['IP']
+
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(client_count=1), 30, '--client_ip={}'.format(client_ip))
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ # verify IP from output with filter IP
+ for i in metrics['client_metadata'][self.fs.name]:
+ self.assertEqual(client_ip, metrics['client_metadata'][self.fs.name][i]['IP'])
+
+ def test_query_mds_and_client_filter(self):
+ # validate
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ # grow the mds cluster
+ self.fs.grow(2)
+
+ fscid = self.fs.id
+ # spread directory per rank
+ self._spread_directory_on_all_ranks(fscid)
+
+ # spread some I/O
+ self._do_spread_io_all_clients(fscid)
+
+ # wait a bit for mgr to get updated metrics
+ time.sleep(5)
+
+ # validate
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ mds_metrics = metrics['metrics']
+
+ # pick an random client
+ client = random.choice(list(mds_metrics['mds.1'].keys()))
+ # could have used regex to extract client id
+ client_id = (client.split(' ')[0]).split('.')[-1]
+ filtered_mds = 1
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(client_count=1, ranks=[filtered_mds]),
+ 30, '--mds_rank={}'.format(filtered_mds), '--client_id={}'.format(client_id))
+ log.debug("metrics={0}".format(metrics))
+ self.assertTrue(valid)
+
+ def test_for_invalid_mds_rank(self):
+ invalid_mds_rank = "1,"
+ # try, 'fs perf stat' command with invalid mds_rank
+ try:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--mds_rank", invalid_mds_rank)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EINVAL:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs perf stat' command to fail for invalid mds_rank")
+
+ def test_for_invalid_client_id(self):
+ invalid_client_id = "abcd"
+ # try, 'fs perf stat' command with invalid client_id
+ try:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--client_id", invalid_client_id)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EINVAL:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs perf stat' command to fail for invalid client_id")
+
+ def test_for_invalid_client_ip(self):
+ invalid_client_ip = "1.2.3"
+ # try, 'fs perf stat' command with invalid client_ip
+ try:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--client_ip", invalid_client_ip)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EINVAL:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs perf stat' command to fail for invalid client_ip")
+
+ def test_perf_stats_stale_metrics(self):
+ """
+ That `ceph fs perf stats` doesn't output stale metrics after the rank0 MDS failover
+ """
+ # validate
+ valid, metrics = self._get_metrics(self.verify_mds_metrics(
+ active_mds_count=1, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+ log.debug(f'metrics={metrics}')
+ self.assertTrue(valid)
+
+ # mount_a and mount_b are the clients mounted for TestMDSMetrics. So get their
+ # entries from the global_metrics.
+ client_a_name = f'client.{self.mount_a.get_global_id()}'
+ client_b_name = f'client.{self.mount_b.get_global_id()}'
+
+ global_metrics = metrics['global_metrics']
+ client_a_metrics = global_metrics[self.fs.name][client_a_name]
+ client_b_metrics = global_metrics[self.fs.name][client_b_name]
+
+ # fail rank0 mds
+ self.fs.rank_fail(rank=0)
+
+ # Wait for rank0 up:active state
+ self.fs.wait_for_state('up:active', rank=0, timeout=30)
+
+ fscid = self.fs.id
+
+ # spread directory per rank
+ self._spread_directory_on_all_ranks(fscid)
+
+ # spread some I/O
+ self._do_spread_io_all_clients(fscid)
+
+ # wait a bit for mgr to get updated metrics
+ time.sleep(5)
+
+ # validate
+ try:
+ valid, metrics_new = self._get_metrics(self.verify_mds_metrics(
+ active_mds_count=1, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30)
+ log.debug(f'metrics={metrics_new}')
+ self.assertTrue(valid)
+
+ client_metadata = metrics_new['client_metadata']
+ client_a_metadata = client_metadata.get(self.fs.name, {}).get(client_a_name, {})
+ client_b_metadata = client_metadata.get(self.fs.name, {}).get(client_b_name, {})
+
+ global_metrics = metrics_new['global_metrics']
+ client_a_metrics_new = global_metrics.get(self.fs.name, {}).get(client_a_name, {})
+ client_b_metrics_new = global_metrics.get(self.fs.name, {}).get(client_b_name, {})
+
+ # the metrics should be different for the test to succeed.
+ self.assertTrue(client_a_metadata and client_b_metadata and
+ client_a_metrics_new and client_b_metrics_new and
+ (client_a_metrics_new != client_a_metrics) and
+ (client_b_metrics_new != client_b_metrics),
+ "Invalid 'ceph fs perf stats' metrics after rank0 mds failover")
+ except MaxWhileTries:
+ raise RuntimeError("Failed to fetch 'ceph fs perf stats' metrics")
+ finally:
+ # cleanup test directories
+ self._cleanup_test_dirs()
+
+ def test_client_metrics_and_metadata(self):
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+ self.fs.delete_all_filesystems()
+
+ self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set",
+ "enable_multiple", "true", "--yes-i-really-mean-it")
+
+ # creating filesystem
+ fs_a = self._setup_fs(fs_name="fs1")
+
+ # Mount a client on fs_a
+ self.mount_a.mount_wait(cephfs_name=fs_a.name)
+ self.mount_a.write_n_mb("pad.bin", 1)
+ self.mount_a.write_n_mb("test.bin", 2)
+ self.mount_a.path_to_ino("test.bin")
+ self.mount_a.create_files()
+
+ # creating another filesystem
+ fs_b = self._setup_fs(fs_name="fs2")
+
+ # Mount a client on fs_b
+ self.mount_b.mount_wait(cephfs_name=fs_b.name)
+ self.mount_b.write_n_mb("test.bin", 1)
+ self.mount_b.path_to_ino("test.bin")
+ self.mount_b.create_files()
+
+ fscid_list = [fs_a.id, fs_b.id]
+
+ # validate
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(client_count=1, mul_fs=fscid_list), 30)
+ log.debug(f"metrics={metrics}")
+ self.assertTrue(valid)
+
+ client_metadata_a = metrics['client_metadata']['fs1']
+ client_metadata_b = metrics['client_metadata']['fs2']
+
+ for i in client_metadata_a:
+ if not (client_metadata_a[i]['hostname']):
+ raise RuntimeError("hostname of fs1 not found!")
+ if not (client_metadata_a[i]['valid_metrics']):
+ raise RuntimeError("valid_metrics of fs1 not found!")
+
+ for i in client_metadata_b:
+ if not (client_metadata_b[i]['hostname']):
+ raise RuntimeError("hostname of fs2 not found!")
+ if not (client_metadata_b[i]['valid_metrics']):
+ raise RuntimeError("valid_metrics of fs2 not found!")
+
+ def test_non_existing_mds_rank(self):
+ def verify_filtered_metrics(metrics):
+ # checks if the metrics has non empty client_metadata and global_metrics
+ if metrics['client_metadata'].get(self.fs.name, {})\
+ or metrics['global_metrics'].get(self.fs.name, {}):
+ return True
+ return False
+
+ try:
+ # validate
+ filter_rank = random.randint(1, 10)
+ valid, metrics = self._get_metrics(verify_filtered_metrics, 30,
+ '--mds_rank={}'.format(filter_rank))
+ log.info(f'metrics={metrics}')
+ self.assertFalse(valid, "Fetched 'ceph fs perf stats' metrics using nonexistent MDS rank")
+ except MaxWhileTries:
+ # success
+ pass
+
+ def test_perf_stats_stale_metrics_with_multiple_filesystem(self):
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set",
+ "enable_multiple", "true", "--yes-i-really-mean-it")
+
+ # creating filesystem
+ fs_b = self._setup_fs(fs_name="fs2")
+
+ # Mount a client on fs_b
+ self.mount_b.mount_wait(cephfs_name=fs_b.name)
+ self.mount_b.write_n_mb("test.bin", 1)
+ self.mount_b.path_to_ino("test.bin")
+ self.mount_b.create_files()
+
+ # creating another filesystem
+ fs_a = self._setup_fs(fs_name="fs1")
+
+ # Mount a client on fs_a
+ self.mount_a.mount_wait(cephfs_name=fs_a.name)
+ self.mount_a.write_n_mb("pad.bin", 1)
+ self.mount_a.write_n_mb("test.bin", 2)
+ self.mount_a.path_to_ino("test.bin")
+ self.mount_a.create_files()
+
+ # validate
+ valid, metrics = self._get_metrics(
+ self.verify_mds_metrics(client_count=1, mul_fs=[fs_a.id, fs_b.id]), 30)
+ log.debug(f"metrics={metrics}")
+ self.assertTrue(valid)
+
+ # get mounted client's entries from the global_metrics.
+ client_a_name = f'client.{self.mount_a.get_global_id()}'
+
+ global_metrics = metrics['global_metrics']
+ client_a_metrics = global_metrics.get("fs1", {}).get(client_a_name, {})
+
+ # fail active mds of fs_a
+ fs_a_mds = fs_a.get_active_names()[0]
+ self.mds_cluster.mds_fail(fs_a_mds)
+ fs_a.wait_for_state('up:active', rank=0, timeout=30)
+
+ # spread directory per rank
+ self._spread_directory_on_all_ranks(fs_a.id)
+
+ # spread some I/O
+ self._do_spread_io_all_clients(fs_a.id)
+
+ # wait a bit for mgr to get updated metrics
+ time.sleep(5)
+
+ # validate
+ try:
+ valid, metrics_new = self._get_metrics(
+ self.verify_mds_metrics(client_count=1, mul_fs=[fs_a.id, fs_b.id]), 30)
+ log.debug(f'metrics={metrics_new}')
+ self.assertTrue(valid)
+
+ client_metadata = metrics_new['client_metadata']
+ client_a_metadata = client_metadata.get("fs1", {}).get(client_a_name, {})
+
+ global_metrics = metrics_new['global_metrics']
+ client_a_metrics_new = global_metrics.get("fs1", {}).get(client_a_name, {})
+
+ # the metrics should be different for the test to succeed.
+ self.assertTrue(client_a_metadata and client_a_metrics_new
+ and (client_a_metrics_new != client_a_metrics),
+ "Invalid 'ceph fs perf stats' metrics after"
+ f" rank0 mds of {fs_a.name} failover")
+ except MaxWhileTries:
+ raise RuntimeError("Failed to fetch `ceph fs perf stats` metrics")
+ finally:
+ # cleanup test directories
+ self._cleanup_test_dirs()
+
diff --git a/qa/tasks/cephfs/test_meta_injection.py b/qa/tasks/cephfs/test_meta_injection.py
new file mode 100644
index 000000000..916b30a25
--- /dev/null
+++ b/qa/tasks/cephfs/test_meta_injection.py
@@ -0,0 +1,38 @@
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+class TestMetaInjection(CephFSTestCase):
+ def test_meta_injection(self):
+ conf_ori = self.fs.mds_asok(['config', 'show'])
+ self.fs.mds_asok(['config', 'set', 'mds_log_max_segments', '1'])
+ self.mount_a.run_shell(["mkdir", "metadir"])
+ self.mount_a.run_shell(["touch", "metadir/metafile1"])
+ self.mount_a.run_shell(["touch", "metadir/metafile2"])
+ self.fs.mds_asok(['flush', 'journal'])
+ dirino = self.mount_a.path_to_ino("metadir")
+ ino = self.mount_a.path_to_ino("metadir/metafile1")
+
+ # export meta of ino
+ self.fs.meta_tool(['showm', '-i', str(ino), '-o', '/tmp/meta_out'], 0, True)
+ out = self.mount_a.run_shell(['grep', str(ino),'/tmp/meta_out']).stdout.getvalue().strip()
+
+ # check the metadata of ino
+ self.assertNotEqual(out.find(u'"ino":'+ str(ino)), -1)
+
+ # amend info of ino
+ self.fs.get_meta_of_fs_file(dirino, "metafile1", "/tmp/meta_obj")
+ self.fs.meta_tool(['amend', '-i', str(ino), '--in', '/tmp/meta_out', '--yes-i-really-really-mean-it'], 0, True)
+ self.fs.get_meta_of_fs_file(dirino, "metafile1", "/tmp/meta_obj_chg")
+
+ # checkout meta_out after import it
+ ori_mds5 = self.mount_a.run_shell(["md5sum", "/tmp/meta_obj"]).stdout.getvalue().strip().split()
+ chg_mds5 = self.mount_a.run_shell(["md5sum", "/tmp/meta_obj_chg"]).stdout.getvalue().strip().split()
+ print(ori_mds5," ==> ", chg_mds5)
+ self.assertEqual(len(ori_mds5), 2)
+ self.assertEqual(len(chg_mds5), 2)
+ self.assertEqual(ori_mds5[0], chg_mds5[0])
+
+ self.mount_a.run_shell(["rm", "metadir", "-rf"])
+ self.mount_a.run_shell(["rm", "/tmp/meta_obj"])
+ self.mount_a.run_shell(["rm", "/tmp/meta_obj_chg"])
+ # restore config of mds_log_max_segments
+ self.fs.mds_asok(['config', 'set', 'mds_log_max_segments', conf_ori["mds_log_max_segments"]])
diff --git a/qa/tasks/cephfs/test_mirroring.py b/qa/tasks/cephfs/test_mirroring.py
new file mode 100644
index 000000000..c1a940e3f
--- /dev/null
+++ b/qa/tasks/cephfs/test_mirroring.py
@@ -0,0 +1,1298 @@
+import os
+import json
+import errno
+import logging
+import random
+import time
+
+from io import StringIO
+from collections import deque
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+from teuthology.contextutil import safe_while
+
+log = logging.getLogger(__name__)
+
+class TestMirroring(CephFSTestCase):
+ MDSS_REQUIRED = 5
+ CLIENTS_REQUIRED = 2
+ REQUIRE_BACKUP_FILESYSTEM = True
+
+ MODULE_NAME = "mirroring"
+
+ def setUp(self):
+ super(TestMirroring, self).setUp()
+ self.primary_fs_name = self.fs.name
+ self.primary_fs_id = self.fs.id
+ self.secondary_fs_name = self.backup_fs.name
+ self.secondary_fs_id = self.backup_fs.id
+ self.enable_mirroring_module()
+
+ def tearDown(self):
+ self.disable_mirroring_module()
+ super(TestMirroring, self).tearDown()
+
+ def enable_mirroring_module(self):
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", TestMirroring.MODULE_NAME)
+
+ def disable_mirroring_module(self):
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", TestMirroring.MODULE_NAME)
+
+ def enable_mirroring(self, fs_name, fs_id):
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "enable", fs_name)
+ time.sleep(10)
+ # verify via asok
+ res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+ 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+ self.assertTrue(res['peers'] == {})
+ self.assertTrue(res['snap_dirs']['dir_count'] == 0)
+
+ def disable_mirroring(self, fs_name, fs_id):
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "disable", fs_name)
+ time.sleep(10)
+ # verify via asok
+ try:
+ self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+ 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+ except CommandFailedError:
+ pass
+ else:
+ raise RuntimeError('expected admin socket to be unavailable')
+
+ def verify_peer_added(self, fs_name, fs_id, peer_spec, remote_fs_name=None):
+ # verify via asok
+ res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+ 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+ peer_uuid = self.get_peer_uuid(peer_spec)
+ self.assertTrue(peer_uuid in res['peers'])
+ client_name = res['peers'][peer_uuid]['remote']['client_name']
+ cluster_name = res['peers'][peer_uuid]['remote']['cluster_name']
+ self.assertTrue(peer_spec == f'{client_name}@{cluster_name}')
+ if remote_fs_name:
+ self.assertTrue(self.secondary_fs_name == res['peers'][peer_uuid]['remote']['fs_name'])
+ else:
+ self.assertTrue(self.fs_name == res['peers'][peer_uuid]['remote']['fs_name'])
+
+ def peer_add(self, fs_name, fs_id, peer_spec, remote_fs_name=None):
+ if remote_fs_name:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_add", fs_name, peer_spec, remote_fs_name)
+ else:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_add", fs_name, peer_spec)
+ time.sleep(10)
+ self.verify_peer_added(fs_name, fs_id, peer_spec, remote_fs_name)
+
+ def peer_remove(self, fs_name, fs_id, peer_spec):
+ peer_uuid = self.get_peer_uuid(peer_spec)
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_remove", fs_name, peer_uuid)
+ time.sleep(10)
+ # verify via asok
+ res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+ 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+ self.assertTrue(res['peers'] == {} and res['snap_dirs']['dir_count'] == 0)
+
+ def bootstrap_peer(self, fs_name, client_name, site_name):
+ outj = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd(
+ "fs", "snapshot", "mirror", "peer_bootstrap", "create", fs_name, client_name, site_name))
+ return outj['token']
+
+ def import_peer(self, fs_name, token):
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_bootstrap", "import",
+ fs_name, token)
+
+ def add_directory(self, fs_name, fs_id, dir_name):
+ # get initial dir count
+ res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+ 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+ dir_count = res['snap_dirs']['dir_count']
+ log.debug(f'initial dir_count={dir_count}')
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", fs_name, dir_name)
+
+ time.sleep(10)
+ # verify via asok
+ res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+ 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+ new_dir_count = res['snap_dirs']['dir_count']
+ log.debug(f'new dir_count={new_dir_count}')
+ self.assertTrue(new_dir_count > dir_count)
+
+ def remove_directory(self, fs_name, fs_id, dir_name):
+ # get initial dir count
+ res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+ 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+ dir_count = res['snap_dirs']['dir_count']
+ log.debug(f'initial dir_count={dir_count}')
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "remove", fs_name, dir_name)
+
+ time.sleep(10)
+ # verify via asok
+ res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+ 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+ new_dir_count = res['snap_dirs']['dir_count']
+ log.debug(f'new dir_count={new_dir_count}')
+ self.assertTrue(new_dir_count < dir_count)
+
+ def check_peer_status(self, fs_name, fs_id, peer_spec, dir_name, expected_snap_name,
+ expected_snap_count):
+ peer_uuid = self.get_peer_uuid(peer_spec)
+ res = self.mirror_daemon_command(f'peer status for fs: {fs_name}',
+ 'fs', 'mirror', 'peer', 'status',
+ f'{fs_name}@{fs_id}', peer_uuid)
+ self.assertTrue(dir_name in res)
+ self.assertTrue(res[dir_name]['last_synced_snap']['name'] == expected_snap_name)
+ self.assertTrue(res[dir_name]['snaps_synced'] == expected_snap_count)
+
+ def check_peer_status_deleted_snap(self, fs_name, fs_id, peer_spec, dir_name,
+ expected_delete_count):
+ peer_uuid = self.get_peer_uuid(peer_spec)
+ res = self.mirror_daemon_command(f'peer status for fs: {fs_name}',
+ 'fs', 'mirror', 'peer', 'status',
+ f'{fs_name}@{fs_id}', peer_uuid)
+ self.assertTrue(dir_name in res)
+ self.assertTrue(res[dir_name]['snaps_deleted'] == expected_delete_count)
+
+ def check_peer_status_renamed_snap(self, fs_name, fs_id, peer_spec, dir_name,
+ expected_rename_count):
+ peer_uuid = self.get_peer_uuid(peer_spec)
+ res = self.mirror_daemon_command(f'peer status for fs: {fs_name}',
+ 'fs', 'mirror', 'peer', 'status',
+ f'{fs_name}@{fs_id}', peer_uuid)
+ self.assertTrue(dir_name in res)
+ self.assertTrue(res[dir_name]['snaps_renamed'] == expected_rename_count)
+
+ def check_peer_snap_in_progress(self, fs_name, fs_id,
+ peer_spec, dir_name, snap_name):
+ peer_uuid = self.get_peer_uuid(peer_spec)
+ res = self.mirror_daemon_command(f'peer status for fs: {fs_name}',
+ 'fs', 'mirror', 'peer', 'status',
+ f'{fs_name}@{fs_id}', peer_uuid)
+ self.assertTrue('syncing' == res[dir_name]['state'])
+ self.assertTrue(res[dir_name]['current_sycning_snap']['name'] == snap_name)
+
+ def verify_snapshot(self, dir_name, snap_name):
+ snap_list = self.mount_b.ls(path=f'{dir_name}/.snap')
+ self.assertTrue(snap_name in snap_list)
+
+ source_res = self.mount_a.dir_checksum(path=f'{dir_name}/.snap/{snap_name}',
+ follow_symlinks=True)
+ log.debug(f'source snapshot checksum {snap_name} {source_res}')
+
+ dest_res = self.mount_b.dir_checksum(path=f'{dir_name}/.snap/{snap_name}',
+ follow_symlinks=True)
+ log.debug(f'destination snapshot checksum {snap_name} {dest_res}')
+ self.assertTrue(source_res == dest_res)
+
+ def verify_failed_directory(self, fs_name, fs_id, peer_spec, dir_name):
+ peer_uuid = self.get_peer_uuid(peer_spec)
+ res = self.mirror_daemon_command(f'peer status for fs: {fs_name}',
+ 'fs', 'mirror', 'peer', 'status',
+ f'{fs_name}@{fs_id}', peer_uuid)
+ self.assertTrue('failed' == res[dir_name]['state'])
+
+ def get_peer_uuid(self, peer_spec):
+ status = self.fs.status()
+ fs_map = status.get_fsmap_byname(self.primary_fs_name)
+ peers = fs_map['mirror_info']['peers']
+ for peer_uuid, mirror_info in peers.items():
+ client_name = mirror_info['remote']['client_name']
+ cluster_name = mirror_info['remote']['cluster_name']
+ remote_peer_spec = f'{client_name}@{cluster_name}'
+ if peer_spec == remote_peer_spec:
+ return peer_uuid
+ return None
+
+ def get_daemon_admin_socket(self):
+ """overloaded by teuthology override (fs/mirror/clients/mirror.yaml)"""
+ return "/var/run/ceph/cephfs-mirror.asok"
+
+ def get_mirror_daemon_pid(self):
+ """pid file overloaded in fs/mirror/clients/mirror.yaml"""
+ return self.mount_a.run_shell(['cat', '/var/run/ceph/cephfs-mirror.pid']).stdout.getvalue().strip()
+
+ def get_mirror_rados_addr(self, fs_name, fs_id):
+ """return the rados addr used by cephfs-mirror instance"""
+ res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
+ 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
+ return res['rados_inst']
+
+ def mirror_daemon_command(self, cmd_label, *args):
+ asok_path = self.get_daemon_admin_socket()
+ try:
+ # use mount_a's remote to execute command
+ p = self.mount_a.client_remote.run(args=
+ ['ceph', '--admin-daemon', asok_path] + list(args),
+ stdout=StringIO(), stderr=StringIO(), timeout=30,
+ check_status=True, label=cmd_label)
+ p.wait()
+ except CommandFailedError as ce:
+ log.warn(f'mirror daemon command with label "{cmd_label}" failed: {ce}')
+ raise
+ res = p.stdout.getvalue().strip()
+ log.debug(f'command returned={res}')
+ return json.loads(res)
+
+ def get_mirror_daemon_status(self):
+ daemon_status = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "daemon", "status"))
+ log.debug(f'daemon_status: {daemon_status}')
+ # running a single mirror daemon is supported
+ status = daemon_status[0]
+ log.debug(f'status: {status}')
+ return status
+
+ def test_basic_mirror_commands(self):
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_mirror_peer_commands(self):
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ # add peer
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+ # remove peer
+ self.peer_remove(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph")
+
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_mirror_disable_with_peer(self):
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ # add peer
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_matching_peer(self):
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ try:
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph")
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EINVAL:
+ raise RuntimeError('invalid errno when adding a matching remote peer')
+ else:
+ raise RuntimeError('adding a peer matching local spec should fail')
+
+ # verify via asok -- nothing should get added
+ res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+ 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+ self.assertTrue(res['peers'] == {})
+
+ # and explicitly specifying the spec (via filesystem name) should fail too
+ try:
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.primary_fs_name)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EINVAL:
+ raise RuntimeError('invalid errno when adding a matching remote peer')
+ else:
+ raise RuntimeError('adding a peer matching local spec should fail')
+
+ # verify via asok -- nothing should get added
+ res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+ 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+ self.assertTrue(res['peers'] == {})
+
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_mirror_peer_add_existing(self):
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ # add peer
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+ # adding the same peer should be idempotent
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+ # remove peer
+ self.peer_remove(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph")
+
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_peer_commands_with_mirroring_disabled(self):
+ # try adding peer when mirroring is not enabled
+ try:
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EINVAL:
+ raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a peer')
+ else:
+ raise RuntimeError(-errno.EINVAL, 'expected peer_add to fail')
+
+ # try removing peer
+ try:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_remove", self.primary_fs_name, 'dummy-uuid')
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EINVAL:
+ raise RuntimeError(-errno.EINVAL, 'incorrect error code when removing a peer')
+ else:
+ raise RuntimeError(-errno.EINVAL, 'expected peer_remove to fail')
+
+ def test_add_directory_with_mirroring_disabled(self):
+ # try adding a directory when mirroring is not enabled
+ try:
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, "/d1")
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EINVAL:
+ raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a directory')
+ else:
+ raise RuntimeError(-errno.EINVAL, 'expected directory add to fail')
+
+ def test_directory_commands(self):
+ self.mount_a.run_shell(["mkdir", "d1"])
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1')
+ try:
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1')
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EEXIST:
+ raise RuntimeError(-errno.EINVAL, 'incorrect error code when re-adding a directory')
+ else:
+ raise RuntimeError(-errno.EINVAL, 'expected directory add to fail')
+ self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d1')
+ try:
+ self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d1')
+ except CommandFailedError as ce:
+ if ce.exitstatus not in (errno.ENOENT, errno.EINVAL):
+ raise RuntimeError(-errno.EINVAL, 'incorrect error code when re-deleting a directory')
+ else:
+ raise RuntimeError(-errno.EINVAL, 'expected directory removal to fail')
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.mount_a.run_shell(["rmdir", "d1"])
+
+ def test_add_relative_directory_path(self):
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ try:
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, './d1')
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EINVAL:
+ raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a relative path dir')
+ else:
+ raise RuntimeError(-errno.EINVAL, 'expected directory add to fail')
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_add_directory_path_normalization(self):
+ self.mount_a.run_shell(["mkdir", "-p", "d1/d2/d3"])
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1/d2/d3')
+ def check_add_command_failure(dir_path):
+ try:
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, dir_path)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EEXIST:
+ raise RuntimeError(-errno.EINVAL, 'incorrect error code when re-adding a directory')
+ else:
+ raise RuntimeError(-errno.EINVAL, 'expected directory add to fail')
+
+ # everything points for /d1/d2/d3
+ check_add_command_failure('/d1/d2/././././././d3')
+ check_add_command_failure('/d1/d2/././././././d3//////')
+ check_add_command_failure('/d1/d2/../d2/././././d3')
+ check_add_command_failure('/././././d1/./././d2/./././d3//////')
+ check_add_command_failure('/./d1/./d2/./d3/../../../d1/d2/d3')
+
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.mount_a.run_shell(["rm", "-rf", "d1"])
+
+ def test_add_ancestor_and_child_directory(self):
+ self.mount_a.run_shell(["mkdir", "-p", "d1/d2/d3"])
+ self.mount_a.run_shell(["mkdir", "-p", "d1/d4"])
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1/d2/')
+ def check_add_command_failure(dir_path):
+ try:
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, dir_path)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EINVAL:
+ raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a directory')
+ else:
+ raise RuntimeError(-errno.EINVAL, 'expected directory add to fail')
+
+ # cannot add ancestors or a subtree for an existing directory
+ check_add_command_failure('/')
+ check_add_command_failure('/d1')
+ check_add_command_failure('/d1/d2/d3')
+
+ # obviously, one can add a non-ancestor or non-subtree
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1/d4/')
+
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.mount_a.run_shell(["rm", "-rf", "d1"])
+
+ def test_cephfs_mirror_blocklist(self):
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ # add peer
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+ res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+ 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+ peers_1 = set(res['peers'])
+
+ # fetch rados address for blacklist check
+ rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id)
+
+ # simulate non-responding mirror daemon by sending SIGSTOP
+ pid = self.get_mirror_daemon_pid()
+ log.debug(f'SIGSTOP to cephfs-mirror pid {pid}')
+ self.mount_a.run_shell(['kill', '-SIGSTOP', pid])
+
+ # wait for blocklist timeout -- the manager module would blocklist
+ # the mirror daemon
+ time.sleep(40)
+
+ # wake up the mirror daemon -- at this point, the daemon should know
+ # that it has been blocklisted
+ log.debug('SIGCONT to cephfs-mirror')
+ self.mount_a.run_shell(['kill', '-SIGCONT', pid])
+
+ # check if the rados addr is blocklisted
+ self.assertTrue(self.mds_cluster.is_addr_blocklisted(rados_inst))
+
+ # wait enough so that the mirror daemon restarts blocklisted instances
+ time.sleep(40)
+ rados_inst_new = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id)
+
+ # and we should get a new rados instance
+ self.assertTrue(rados_inst != rados_inst_new)
+
+ # along with peers that were added
+ res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+ 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+ peers_2 = set(res['peers'])
+ self.assertTrue(peers_1, peers_2)
+
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_cephfs_mirror_stats(self):
+ log.debug('reconfigure client auth caps')
+ self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
+ 'mds', 'allow rw',
+ 'mon', 'allow r',
+ 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+ self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+
+ log.debug(f'mounting filesystem {self.secondary_fs_name}')
+ self.mount_b.umount_wait()
+ self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+ # create a bunch of files in a directory to snap
+ self.mount_a.run_shell(["mkdir", "d0"])
+ self.mount_a.create_n_files('d0/file', 50, sync=True)
+
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+ # take a snapshot
+ self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
+
+ time.sleep(30)
+ self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d0', 'snap0', 1)
+ self.verify_snapshot('d0', 'snap0')
+
+ # some more IO
+ self.mount_a.run_shell(["mkdir", "d0/d00"])
+ self.mount_a.run_shell(["mkdir", "d0/d01"])
+
+ self.mount_a.create_n_files('d0/d00/more_file', 20, sync=True)
+ self.mount_a.create_n_files('d0/d01/some_more_file', 75, sync=True)
+
+ # take another snapshot
+ self.mount_a.run_shell(["mkdir", "d0/.snap/snap1"])
+
+ time.sleep(60)
+ self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d0', 'snap1', 2)
+ self.verify_snapshot('d0', 'snap1')
+
+ # delete a snapshot
+ self.mount_a.run_shell(["rmdir", "d0/.snap/snap0"])
+
+ time.sleep(10)
+ snap_list = self.mount_b.ls(path='d0/.snap')
+ self.assertTrue('snap0' not in snap_list)
+ self.check_peer_status_deleted_snap(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d0', 1)
+
+ # rename a snapshot
+ self.mount_a.run_shell(["mv", "d0/.snap/snap1", "d0/.snap/snap2"])
+
+ time.sleep(10)
+ snap_list = self.mount_b.ls(path='d0/.snap')
+ self.assertTrue('snap1' not in snap_list)
+ self.assertTrue('snap2' in snap_list)
+ self.check_peer_status_renamed_snap(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d0', 1)
+
+ self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_cephfs_mirror_cancel_sync(self):
+ log.debug('reconfigure client auth caps')
+ self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
+ 'mds', 'allow rw',
+ 'mon', 'allow r',
+ 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+ self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+
+ log.debug(f'mounting filesystem {self.secondary_fs_name}')
+ self.mount_b.umount_wait()
+ self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+ # create a bunch of files in a directory to snap
+ self.mount_a.run_shell(["mkdir", "d0"])
+ for i in range(8):
+ filename = f'file.{i}'
+ self.mount_a.write_n_mb(os.path.join('d0', filename), 1024)
+
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+ # take a snapshot
+ self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
+
+ time.sleep(10)
+ self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d0', 'snap0')
+
+ self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+
+ snap_list = self.mount_b.ls(path='d0/.snap')
+ self.assertTrue('snap0' not in snap_list)
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_cephfs_mirror_restart_sync_on_blocklist(self):
+ log.debug('reconfigure client auth caps')
+ self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
+ 'mds', 'allow rw',
+ 'mon', 'allow r',
+ 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+ self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+
+ log.debug(f'mounting filesystem {self.secondary_fs_name}')
+ self.mount_b.umount_wait()
+ self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+ # create a bunch of files in a directory to snap
+ self.mount_a.run_shell(["mkdir", "d0"])
+ for i in range(8):
+ filename = f'file.{i}'
+ self.mount_a.write_n_mb(os.path.join('d0', filename), 1024)
+
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+ # fetch rados address for blacklist check
+ rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id)
+
+ # take a snapshot
+ self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
+
+ time.sleep(10)
+ self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d0', 'snap0')
+
+ # simulate non-responding mirror daemon by sending SIGSTOP
+ pid = self.get_mirror_daemon_pid()
+ log.debug(f'SIGSTOP to cephfs-mirror pid {pid}')
+ self.mount_a.run_shell(['kill', '-SIGSTOP', pid])
+
+ # wait for blocklist timeout -- the manager module would blocklist
+ # the mirror daemon
+ time.sleep(40)
+
+ # wake up the mirror daemon -- at this point, the daemon should know
+ # that it has been blocklisted
+ log.debug('SIGCONT to cephfs-mirror')
+ self.mount_a.run_shell(['kill', '-SIGCONT', pid])
+
+ # check if the rados addr is blocklisted
+ self.assertTrue(self.mds_cluster.is_addr_blocklisted(rados_inst))
+
+ time.sleep(500)
+ self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d0', 'snap0', expected_snap_count=1)
+ self.verify_snapshot('d0', 'snap0')
+
+ self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_cephfs_mirror_failed_sync_with_correction(self):
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+ # add a non-existent directory for synchronization
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+
+ # wait for mirror daemon to mark it the directory as failed
+ time.sleep(120)
+ self.verify_failed_directory(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d0')
+
+ # create the directory
+ self.mount_a.run_shell(["mkdir", "d0"])
+ self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
+
+ # wait for correction
+ time.sleep(120)
+ self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d0', 'snap0', 1)
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_cephfs_mirror_service_daemon_status(self):
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+ time.sleep(30)
+ status = self.get_mirror_daemon_status()
+
+ # assumption for this test: mirroring enabled for a single filesystem w/ single
+ # peer
+
+ # we have not added any directories
+ peer = status['filesystems'][0]['peers'][0]
+ self.assertEquals(status['filesystems'][0]['directory_count'], 0)
+ self.assertEquals(peer['stats']['failure_count'], 0)
+ self.assertEquals(peer['stats']['recovery_count'], 0)
+
+ # add a non-existent directory for synchronization -- check if its reported
+ # in daemon stats
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+
+ time.sleep(120)
+ status = self.get_mirror_daemon_status()
+ # we added one
+ peer = status['filesystems'][0]['peers'][0]
+ self.assertEquals(status['filesystems'][0]['directory_count'], 1)
+ # failure count should be reflected
+ self.assertEquals(peer['stats']['failure_count'], 1)
+ self.assertEquals(peer['stats']['recovery_count'], 0)
+
+ # create the directory, mirror daemon would recover
+ self.mount_a.run_shell(["mkdir", "d0"])
+
+ time.sleep(120)
+ status = self.get_mirror_daemon_status()
+ peer = status['filesystems'][0]['peers'][0]
+ self.assertEquals(status['filesystems'][0]['directory_count'], 1)
+ # failure and recovery count should be reflected
+ self.assertEquals(peer['stats']['failure_count'], 1)
+ self.assertEquals(peer['stats']['recovery_count'], 1)
+
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_mirroring_init_failure(self):
+ """Test mirror daemon init failure"""
+
+ # disable mgr mirroring plugin as it would try to load dir map on
+ # on mirroring enabled for a filesystem (an throw up erorrs in
+ # the logs)
+ self.disable_mirroring_module()
+
+ # enable mirroring through mon interface -- this should result in the mirror daemon
+ # failing to enable mirroring due to absence of `cephfs_mirorr` index object.
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", self.primary_fs_name)
+
+ with safe_while(sleep=5, tries=10, action='wait for failed state') as proceed:
+ while proceed():
+ try:
+ # verify via asok
+ res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+ 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+ if not 'state' in res:
+ return
+ self.assertTrue(res['state'] == "failed")
+ return True
+ except:
+ pass
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", self.primary_fs_name)
+ time.sleep(10)
+ # verify via asok
+ try:
+ self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+ 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+ except CommandFailedError:
+ pass
+ else:
+ raise RuntimeError('expected admin socket to be unavailable')
+
+ def test_mirroring_init_failure_with_recovery(self):
+ """Test if the mirror daemon can recover from a init failure"""
+
+ # disable mgr mirroring plugin as it would try to load dir map on
+ # on mirroring enabled for a filesystem (an throw up erorrs in
+ # the logs)
+ self.disable_mirroring_module()
+
+ # enable mirroring through mon interface -- this should result in the mirror daemon
+ # failing to enable mirroring due to absence of `cephfs_mirror` index object.
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", self.primary_fs_name)
+ # need safe_while since non-failed status pops up as mirroring is restarted
+ # internally in mirror daemon.
+ with safe_while(sleep=5, tries=20, action='wait for failed state') as proceed:
+ while proceed():
+ try:
+ # verify via asok
+ res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+ 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+ if not 'state' in res:
+ return
+ self.assertTrue(res['state'] == "failed")
+ return True
+ except:
+ pass
+
+ # create the index object and check daemon recovery
+ try:
+ p = self.mount_a.client_remote.run(args=['rados', '-p', self.fs.metadata_pool_name, 'create', 'cephfs_mirror'],
+ stdout=StringIO(), stderr=StringIO(), timeout=30,
+ check_status=True, label="create index object")
+ p.wait()
+ except CommandFailedError as ce:
+ log.warn(f'mirror daemon command to create mirror index object failed: {ce}')
+ raise
+ time.sleep(30)
+ res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+ 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+ self.assertTrue(res['peers'] == {})
+ self.assertTrue(res['snap_dirs']['dir_count'] == 0)
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", self.primary_fs_name)
+ time.sleep(10)
+ # verify via asok
+ try:
+ self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}',
+ 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}')
+ except CommandFailedError:
+ pass
+ else:
+ raise RuntimeError('expected admin socket to be unavailable')
+
+ def test_cephfs_mirror_peer_bootstrap(self):
+ """Test importing peer bootstrap token"""
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ # create a bootstrap token for the peer
+ bootstrap_token = self.bootstrap_peer(self.secondary_fs_name, "client.mirror_peer_bootstrap", "site-remote")
+
+ # import the peer via bootstrap token
+ self.import_peer(self.primary_fs_name, bootstrap_token)
+ time.sleep(10)
+ self.verify_peer_added(self.primary_fs_name, self.primary_fs_id, "client.mirror_peer_bootstrap@site-remote",
+ self.secondary_fs_name)
+
+ # verify via peer_list interface
+ peer_uuid = self.get_peer_uuid("client.mirror_peer_bootstrap@site-remote")
+ res = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_list", self.primary_fs_name))
+ self.assertTrue(peer_uuid in res)
+ self.assertTrue('mon_host' in res[peer_uuid] and res[peer_uuid]['mon_host'] != '')
+
+ # remove peer
+ self.peer_remove(self.primary_fs_name, self.primary_fs_id, "client.mirror_peer_bootstrap@site-remote")
+ # disable mirroring
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_cephfs_mirror_symlink_sync(self):
+ log.debug('reconfigure client auth caps')
+ self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
+ 'mds', 'allow rw',
+ 'mon', 'allow r',
+ 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+ self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+
+ log.debug(f'mounting filesystem {self.secondary_fs_name}')
+ self.mount_b.umount_wait()
+ self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+ # create a bunch of files w/ symbolic links in a directory to snap
+ self.mount_a.run_shell(["mkdir", "d0"])
+ self.mount_a.create_n_files('d0/file', 10, sync=True)
+ self.mount_a.run_shell(["ln", "-s", "./file_0", "d0/sym_0"])
+ self.mount_a.run_shell(["ln", "-s", "./file_1", "d0/sym_1"])
+ self.mount_a.run_shell(["ln", "-s", "./file_2", "d0/sym_2"])
+
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+ # take a snapshot
+ self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
+
+ time.sleep(30)
+ self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d0', 'snap0', 1)
+ self.verify_snapshot('d0', 'snap0')
+
+ self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_cephfs_mirror_with_parent_snapshot(self):
+ """Test snapshot synchronization with parent directory snapshots"""
+ self.mount_a.run_shell(["mkdir", "-p", "d0/d1/d2/d3"])
+
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0/d1/d2/d3')
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+ # take a snapshot
+ self.mount_a.run_shell(["mkdir", "d0/d1/d2/d3/.snap/snap0"])
+
+ time.sleep(30)
+ self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d0/d1/d2/d3', 'snap0', 1)
+
+ # create snapshots in parent directories
+ self.mount_a.run_shell(["mkdir", "d0/.snap/snap_d0"])
+ self.mount_a.run_shell(["mkdir", "d0/d1/.snap/snap_d1"])
+ self.mount_a.run_shell(["mkdir", "d0/d1/d2/.snap/snap_d2"])
+
+ # try syncing more snapshots
+ self.mount_a.run_shell(["mkdir", "d0/d1/d2/d3/.snap/snap1"])
+ time.sleep(30)
+ self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d0/d1/d2/d3', 'snap1', 2)
+
+ self.mount_a.run_shell(["rmdir", "d0/d1/d2/d3/.snap/snap0"])
+ self.mount_a.run_shell(["rmdir", "d0/d1/d2/d3/.snap/snap1"])
+ time.sleep(15)
+ self.check_peer_status_deleted_snap(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d0/d1/d2/d3', 2)
+
+ self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0/d1/d2/d3')
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_cephfs_mirror_remove_on_stall(self):
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ # fetch rados address for blacklist check
+ rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id)
+
+ # simulate non-responding mirror daemon by sending SIGSTOP
+ pid = self.get_mirror_daemon_pid()
+ log.debug(f'SIGSTOP to cephfs-mirror pid {pid}')
+ self.mount_a.run_shell(['kill', '-SIGSTOP', pid])
+
+ # wait for blocklist timeout -- the manager module would blocklist
+ # the mirror daemon
+ time.sleep(40)
+
+ # make sure the rados addr is blocklisted
+ self.assertTrue(self.mds_cluster.is_addr_blocklisted(rados_inst))
+
+ # now we are sure that there are no "active" mirror daemons -- add a directory path.
+ dir_path_p = "/d0/d1"
+ dir_path = "/d0/d1/d2"
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", self.primary_fs_name, dir_path)
+
+ time.sleep(10)
+ # this uses an undocumented interface to get dirpath map state
+ res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path)
+ res = json.loads(res_json)
+ # there are no mirror daemons
+ self.assertTrue(res['state'], 'stalled')
+
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "remove", self.primary_fs_name, dir_path)
+
+ time.sleep(10)
+ try:
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise RuntimeError('invalid errno when checking dirmap status for non-existent directory')
+ else:
+ raise RuntimeError('incorrect errno when checking dirmap state for non-existent directory')
+
+ # adding a parent directory should be allowed
+ self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", self.primary_fs_name, dir_path_p)
+
+ time.sleep(10)
+ # however, this directory path should get stalled too
+ res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path_p)
+ res = json.loads(res_json)
+ # there are no mirror daemons
+ self.assertTrue(res['state'], 'stalled')
+
+ # wake up the mirror daemon -- at this point, the daemon should know
+ # that it has been blocklisted
+ log.debug('SIGCONT to cephfs-mirror')
+ self.mount_a.run_shell(['kill', '-SIGCONT', pid])
+
+ # wait for restart mirror on blocklist
+ time.sleep(60)
+ res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path_p)
+ res = json.loads(res_json)
+ # there are no mirror daemons
+ self.assertTrue(res['state'], 'mapped')
+
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_cephfs_mirror_incremental_sync(self):
+ """ Test incremental snapshot synchronization (based on mtime differences)."""
+ log.debug('reconfigure client auth caps')
+ self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
+ 'mds', 'allow rw',
+ 'mon', 'allow r',
+ 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+ self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+ log.debug(f'mounting filesystem {self.secondary_fs_name}')
+ self.mount_b.umount_wait()
+ self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+ repo = 'ceph-qa-suite'
+ repo_dir = 'ceph_repo'
+ repo_path = f'{repo_dir}/{repo}'
+
+ def clone_repo():
+ self.mount_a.run_shell([
+ 'git', 'clone', '--branch', 'giant',
+ f'http://github.com/ceph/{repo}', repo_path])
+
+ def exec_git_cmd(cmd_list):
+ self.mount_a.run_shell(['git', '--git-dir', f'{self.mount_a.mountpoint}/{repo_path}/.git', *cmd_list])
+
+ self.mount_a.run_shell(["mkdir", repo_dir])
+ clone_repo()
+
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{repo_path}')
+ self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_a'])
+
+ # full copy, takes time
+ time.sleep(500)
+ self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", f'/{repo_path}', 'snap_a', 1)
+ self.verify_snapshot(repo_path, 'snap_a')
+
+ # create some diff
+ num = random.randint(5, 20)
+ log.debug(f'resetting to HEAD~{num}')
+ exec_git_cmd(["reset", "--hard", f'HEAD~{num}'])
+
+ self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_b'])
+ # incremental copy, should be fast
+ time.sleep(180)
+ self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", f'/{repo_path}', 'snap_b', 2)
+ self.verify_snapshot(repo_path, 'snap_b')
+
+ # diff again, this time back to HEAD
+ log.debug('resetting to HEAD')
+ exec_git_cmd(["pull"])
+
+ self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_c'])
+ # incremental copy, should be fast
+ time.sleep(180)
+ self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", f'/{repo_path}', 'snap_c', 3)
+ self.verify_snapshot(repo_path, 'snap_c')
+
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_cephfs_mirror_incremental_sync_with_type_mixup(self):
+ """ Test incremental snapshot synchronization with file type changes.
+
+ The same filename exist as a different type in subsequent snapshot.
+ This verifies if the mirror daemon can identify file type mismatch and
+ sync snapshots.
+
+ \ snap_0 snap_1 snap_2 snap_3
+ \-----------------------------------------------
+ file_x | reg sym dir reg
+ |
+ file_y | dir reg sym dir
+ |
+ file_z | sym dir reg sym
+ """
+ log.debug('reconfigure client auth caps')
+ self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
+ 'mds', 'allow rw',
+ 'mon', 'allow r',
+ 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+ self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+ log.debug(f'mounting filesystem {self.secondary_fs_name}')
+ self.mount_b.umount_wait()
+ self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+ typs = deque(['reg', 'dir', 'sym'])
+ def cleanup_and_create_with_type(dirname, fnames):
+ self.mount_a.run_shell_payload(f"rm -rf {dirname}/*")
+ fidx = 0
+ for t in typs:
+ fname = f'{dirname}/{fnames[fidx]}'
+ log.debug(f'file: {fname} type: {t}')
+ if t == 'reg':
+ self.mount_a.run_shell(["touch", fname])
+ self.mount_a.write_file(fname, data=fname)
+ elif t == 'dir':
+ self.mount_a.run_shell(["mkdir", fname])
+ elif t == 'sym':
+ # verify ELOOP in mirror daemon
+ self.mount_a.run_shell(["ln", "-s", "..", fname])
+ fidx += 1
+
+ def verify_types(dirname, fnames, snap_name):
+ tidx = 0
+ for fname in fnames:
+ t = self.mount_b.run_shell_payload(f"stat -c %F {dirname}/.snap/{snap_name}/{fname}").stdout.getvalue().strip()
+ if typs[tidx] == 'reg':
+ self.assertEquals('regular file', t)
+ elif typs[tidx] == 'dir':
+ self.assertEquals('directory', t)
+ elif typs[tidx] == 'sym':
+ self.assertEquals('symbolic link', t)
+ tidx += 1
+
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+ self.mount_a.run_shell(["mkdir", "d0"])
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+
+ fnames = ['file_x', 'file_y', 'file_z']
+ turns = 0
+ while turns != len(typs):
+ snapname = f'snap_{turns}'
+ cleanup_and_create_with_type('d0', fnames)
+ self.mount_a.run_shell(['mkdir', f'd0/.snap/{snapname}'])
+ time.sleep(30)
+ self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d0', snapname, turns+1)
+ verify_types('d0', fnames, snapname)
+ # next type
+ typs.rotate(1)
+ turns += 1
+
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_cephfs_mirror_sync_with_purged_snapshot(self):
+ """Test snapshot synchronization in midst of snapshot deletes.
+
+ Deleted the previous snapshot when the mirror daemon is figuring out
+ incremental differences between current and previous snaphot. The
+ mirror daemon should identify the purge and switch to using remote
+ comparison to sync the snapshot (in the next iteration of course).
+ """
+
+ log.debug('reconfigure client auth caps')
+ self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
+ 'mds', 'allow rw',
+ 'mon', 'allow r',
+ 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+ self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+ log.debug(f'mounting filesystem {self.secondary_fs_name}')
+ self.mount_b.umount_wait()
+ self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+ repo = 'ceph-qa-suite'
+ repo_dir = 'ceph_repo'
+ repo_path = f'{repo_dir}/{repo}'
+
+ def clone_repo():
+ self.mount_a.run_shell([
+ 'git', 'clone', '--branch', 'giant',
+ f'http://github.com/ceph/{repo}', repo_path])
+
+ def exec_git_cmd(cmd_list):
+ self.mount_a.run_shell(['git', '--git-dir', f'{self.mount_a.mountpoint}/{repo_path}/.git', *cmd_list])
+
+ self.mount_a.run_shell(["mkdir", repo_dir])
+ clone_repo()
+
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{repo_path}')
+ self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_a'])
+
+ # full copy, takes time
+ time.sleep(500)
+ self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", f'/{repo_path}', 'snap_a', 1)
+ self.verify_snapshot(repo_path, 'snap_a')
+
+ # create some diff
+ num = random.randint(60, 100)
+ log.debug(f'resetting to HEAD~{num}')
+ exec_git_cmd(["reset", "--hard", f'HEAD~{num}'])
+
+ self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_b'])
+
+ time.sleep(15)
+ self.mount_a.run_shell(['rmdir', f'{repo_path}/.snap/snap_a'])
+
+ # incremental copy but based on remote dir_root
+ time.sleep(300)
+ self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", f'/{repo_path}', 'snap_b', 2)
+ self.verify_snapshot(repo_path, 'snap_b')
+
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_cephfs_mirror_peer_add_primary(self):
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+ # try adding the primary file system as a peer to secondary file
+ # system
+ try:
+ self.peer_add(self.secondary_fs_name, self.secondary_fs_id, "client.mirror_remote@ceph", self.primary_fs_name)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EINVAL:
+ raise RuntimeError('invalid errno when adding a primary file system')
+ else:
+ raise RuntimeError('adding peer should fail')
+
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_cephfs_mirror_cancel_mirroring_and_readd(self):
+ """
+ Test adding a directory path for synchronization post removal of already added directory paths
+
+ ... to ensure that synchronization of the newly added directory path functions
+ as expected. Note that we schedule three (3) directories for mirroring to ensure
+ that all replayer threads (3 by default) in the mirror daemon are busy.
+ """
+ log.debug('reconfigure client auth caps')
+ self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
+ 'mds', 'allow rw',
+ 'mon', 'allow r',
+ 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
+ self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+
+ log.debug(f'mounting filesystem {self.secondary_fs_name}')
+ self.mount_b.umount_wait()
+ self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+ # create a bunch of files in a directory to snap
+ self.mount_a.run_shell(["mkdir", "d0"])
+ self.mount_a.run_shell(["mkdir", "d1"])
+ self.mount_a.run_shell(["mkdir", "d2"])
+ for i in range(4):
+ filename = f'file.{i}'
+ self.mount_a.write_n_mb(os.path.join('d0', filename), 1024)
+ self.mount_a.write_n_mb(os.path.join('d1', filename), 1024)
+ self.mount_a.write_n_mb(os.path.join('d2', filename), 1024)
+
+ log.debug('enabling mirroring')
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ log.debug('adding directory paths')
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1')
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d2')
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+ # take snapshots
+ log.debug('taking snapshots')
+ self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
+ self.mount_a.run_shell(["mkdir", "d1/.snap/snap0"])
+ self.mount_a.run_shell(["mkdir", "d2/.snap/snap0"])
+
+ time.sleep(10)
+ log.debug('checking snap in progress')
+ self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d0', 'snap0')
+ self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d1', 'snap0')
+ self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d2', 'snap0')
+
+ log.debug('removing directories 1')
+ self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+ log.debug('removing directories 2')
+ self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d1')
+ log.debug('removing directories 3')
+ self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d2')
+
+ log.debug('removing snapshots')
+ self.mount_a.run_shell(["rmdir", "d0/.snap/snap0"])
+ self.mount_a.run_shell(["rmdir", "d1/.snap/snap0"])
+ self.mount_a.run_shell(["rmdir", "d2/.snap/snap0"])
+
+ for i in range(4):
+ filename = f'file.{i}'
+ log.debug(f'deleting {filename}')
+ self.mount_a.run_shell(["rm", "-f", os.path.join('d0', filename)])
+ self.mount_a.run_shell(["rm", "-f", os.path.join('d1', filename)])
+ self.mount_a.run_shell(["rm", "-f", os.path.join('d2', filename)])
+
+ log.debug('creating new files...')
+ self.mount_a.create_n_files('d0/file', 50, sync=True)
+ self.mount_a.create_n_files('d1/file', 50, sync=True)
+ self.mount_a.create_n_files('d2/file', 50, sync=True)
+
+ log.debug('adding directory paths')
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1')
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d2')
+
+ log.debug('creating new snapshots...')
+ self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
+ self.mount_a.run_shell(["mkdir", "d1/.snap/snap0"])
+ self.mount_a.run_shell(["mkdir", "d2/.snap/snap0"])
+
+ time.sleep(60)
+ self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d0', 'snap0', 1)
+ self.verify_snapshot('d0', 'snap0')
+
+ self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d1', 'snap0', 1)
+ self.verify_snapshot('d1', 'snap0')
+
+ self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/d2', 'snap0', 1)
+ self.verify_snapshot('d2', 'snap0')
+
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+
+ def test_local_and_remote_dir_root_mode(self):
+ log.debug('reconfigure client auth caps')
+ cid = self.mount_b.client_id
+ data_pool = self.backup_fs.get_data_pool_name()
+ self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', f"client.{cid}",
+ 'mds', 'allow rw',
+ 'mon', 'allow r',
+ 'osd', f"allow rw pool={data_pool}, allow rw pool={data_pool}")
+
+ log.debug(f'mounting filesystem {self.secondary_fs_name}')
+ self.mount_b.umount_wait()
+ self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
+
+ self.mount_a.run_shell(["mkdir", "l1"])
+ self.mount_a.run_shell(["mkdir", "l1/.snap/snap0"])
+ self.mount_a.run_shell(["chmod", "go-rwx", "l1"])
+
+ self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.add_directory(self.primary_fs_name, self.primary_fs_id, '/l1')
+ self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+
+ time.sleep(60)
+ self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
+ "client.mirror_remote@ceph", '/l1', 'snap0', 1)
+
+ mode_local = self.mount_a.run_shell(["stat", "--format=%A", "l1"]).stdout.getvalue().strip()
+ mode_remote = self.mount_b.run_shell(["stat", "--format=%A", "l1"]).stdout.getvalue().strip()
+
+ self.assertTrue(mode_local == mode_remote, f"mode mismatch, local mode: {mode_local}, remote mode: {mode_remote}")
+
+ self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
+ self.mount_a.run_shell(["rmdir", "l1/.snap/snap0"])
+ self.mount_a.run_shell(["rmdir", "l1"])
diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py
new file mode 100644
index 000000000..8b48dee69
--- /dev/null
+++ b/qa/tasks/cephfs/test_misc.py
@@ -0,0 +1,640 @@
+from io import StringIO
+
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+from textwrap import dedent
+from threading import Thread
+import errno
+import platform
+import time
+import json
+import logging
+import os
+import re
+
+log = logging.getLogger(__name__)
+
+class TestMisc(CephFSTestCase):
+ CLIENTS_REQUIRED = 2
+
+ def test_statfs_on_deleted_fs(self):
+ """
+ That statfs does not cause monitors to SIGSEGV after fs deletion.
+ """
+
+ self.mount_b.umount_wait()
+ self.mount_a.run_shell_payload("stat -f .")
+ self.fs.delete_all_filesystems()
+ # This will hang either way, run in background.
+ p = self.mount_a.run_shell_payload("stat -f .", wait=False, timeout=60, check_status=False)
+ time.sleep(30)
+ self.assertFalse(p.finished)
+ # the process is stuck in uninterruptible sleep, just kill the mount
+ self.mount_a.umount_wait(force=True)
+ p.wait()
+
+ def test_fuse_mount_on_already_mounted_path(self):
+ if platform.system() != "Linux":
+ self.skipTest("Require Linux platform")
+
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("Require FUSE client")
+
+ # Try to mount already mounted path
+ # expecting EBUSY error
+ try:
+ mount_cmd = ['sudo'] + self.mount_a._mount_bin + [self.mount_a.hostfs_mntpt]
+ self.mount_a.client_remote.run(args=mount_cmd, stderr=StringIO(),
+ stdout=StringIO(), timeout=60, omit_sudo=False)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EBUSY)
+ else:
+ self.fail("Expected EBUSY")
+
+ def test_getattr_caps(self):
+ """
+ Check if MDS recognizes the 'mask' parameter of open request.
+ The parameter allows client to request caps when opening file
+ """
+
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("Require FUSE client")
+
+ # Enable debug. Client will requests CEPH_CAP_XATTR_SHARED
+ # on lookup/open
+ self.mount_b.umount_wait()
+ self.set_conf('client', 'client debug getattr caps', 'true')
+ self.mount_b.mount_wait()
+
+ # create a file and hold it open. MDS will issue CEPH_CAP_EXCL_*
+ # to mount_a
+ p = self.mount_a.open_background("testfile")
+ self.mount_b.wait_for_visible("testfile")
+
+ # this triggers a lookup request and an open request. The debug
+ # code will check if lookup/open reply contains xattrs
+ self.mount_b.run_shell(["cat", "testfile"])
+
+ self.mount_a.kill_background(p)
+
+ def test_root_rctime(self):
+ """
+ Check that the root inode has a non-default rctime on startup.
+ """
+
+ t = time.time()
+ rctime = self.mount_a.getfattr(".", "ceph.dir.rctime")
+ log.info("rctime = {}".format(rctime))
+ self.assertGreaterEqual(float(rctime), t - 10)
+
+ def test_fs_new(self):
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ data_pool_name = self.fs.get_data_pool_name()
+
+ self.fs.fail()
+
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name,
+ '--yes-i-really-mean-it')
+
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
+ self.fs.metadata_pool_name,
+ self.fs.metadata_pool_name,
+ '--yes-i-really-really-mean-it')
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+ self.fs.metadata_pool_name,
+ '--pg_num_min', str(self.fs.pg_num_min))
+
+ # insert a garbage object
+ self.fs.radosm(["put", "foo", "-"], stdin=StringIO("bar"))
+
+ def get_pool_df(fs, name):
+ try:
+ return fs.get_pool_df(name)['objects'] > 0
+ except RuntimeError:
+ return False
+
+ self.wait_until_true(lambda: get_pool_df(self.fs, self.fs.metadata_pool_name), timeout=30)
+
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
+ self.fs.metadata_pool_name,
+ data_pool_name)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.EINVAL)
+ else:
+ raise AssertionError("Expected EINVAL")
+
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
+ self.fs.metadata_pool_name,
+ data_pool_name, "--force")
+
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'fail', self.fs.name)
+
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name,
+ '--yes-i-really-mean-it')
+
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
+ self.fs.metadata_pool_name,
+ self.fs.metadata_pool_name,
+ '--yes-i-really-really-mean-it')
+ self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
+ self.fs.metadata_pool_name,
+ '--pg_num_min', str(self.fs.pg_num_min))
+ self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
+ self.fs.metadata_pool_name,
+ data_pool_name,
+ '--allow_dangerous_metadata_overlay')
+
+ def test_cap_revoke_nonresponder(self):
+ """
+ Check that a client is evicted if it has not responded to cap revoke
+ request for configured number of seconds.
+ """
+ session_timeout = self.fs.get_var("session_timeout")
+ eviction_timeout = session_timeout / 2.0
+
+ self.fs.mds_asok(['config', 'set', 'mds_cap_revoke_eviction_timeout',
+ str(eviction_timeout)])
+
+ cap_holder = self.mount_a.open_background()
+
+ # Wait for the file to be visible from another client, indicating
+ # that mount_a has completed its network ops
+ self.mount_b.wait_for_visible()
+
+ # Simulate client death
+ self.mount_a.suspend_netns()
+
+ try:
+ # The waiter should get stuck waiting for the capability
+ # held on the MDS by the now-dead client A
+ cap_waiter = self.mount_b.write_background()
+
+ a = time.time()
+ time.sleep(eviction_timeout)
+ cap_waiter.wait()
+ b = time.time()
+ cap_waited = b - a
+ log.info("cap_waiter waited {0}s".format(cap_waited))
+
+ # check if the cap is transferred before session timeout kicked in.
+ # this is a good enough check to ensure that the client got evicted
+ # by the cap auto evicter rather than transitioning to stale state
+ # and then getting evicted.
+ self.assertLess(cap_waited, session_timeout,
+ "Capability handover took {0}, expected less than {1}".format(
+ cap_waited, session_timeout
+ ))
+
+ self.assertTrue(self.mds_cluster.is_addr_blocklisted(
+ self.mount_a.get_global_addr()))
+ self.mount_a._kill_background(cap_holder)
+ finally:
+ self.mount_a.resume_netns()
+
+ def test_filtered_df(self):
+ pool_name = self.fs.get_data_pool_name()
+ raw_df = self.fs.get_pool_df(pool_name)
+ raw_avail = float(raw_df["max_avail"])
+ out = self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'get',
+ pool_name, 'size',
+ '-f', 'json-pretty')
+ _ = json.loads(out)
+
+ proc = self.mount_a.run_shell(['df', '.'])
+ output = proc.stdout.getvalue()
+ fs_avail = output.split('\n')[1].split()[3]
+ fs_avail = float(fs_avail) * 1024
+
+ ratio = raw_avail / fs_avail
+ assert 0.9 < ratio < 1.1
+
+ def test_dump_inode(self):
+ info = self.fs.mds_asok(['dump', 'inode', '1'])
+ assert(info['path'] == "/")
+
+ def test_dump_inode_hexademical(self):
+ self.mount_a.run_shell(["mkdir", "-p", "foo"])
+ ino = self.mount_a.path_to_ino("foo")
+ assert type(ino) is int
+ info = self.fs.mds_asok(['dump', 'inode', hex(ino)])
+ assert info['path'] == "/foo"
+
+ def test_fs_lsflags(self):
+ """
+ Check that the lsflags displays the default state and the new state of flags
+ """
+ # Set some flags
+ self.fs.set_joinable(False)
+ self.fs.set_allow_new_snaps(False)
+ self.fs.set_allow_standby_replay(True)
+
+ lsflags = json.loads(self.fs.mon_manager.raw_cluster_cmd('fs', 'lsflags',
+ self.fs.name,
+ "--format=json-pretty"))
+ self.assertEqual(lsflags["joinable"], False)
+ self.assertEqual(lsflags["allow_snaps"], False)
+ self.assertEqual(lsflags["allow_multimds_snaps"], True)
+ self.assertEqual(lsflags["allow_standby_replay"], True)
+
+ def _test_sync_stuck_for_around_5s(self, dir_path, file_sync=False):
+ self.mount_a.run_shell(["mkdir", dir_path])
+
+ sync_dir_pyscript = dedent("""
+ import os
+
+ path = "{path}"
+ dfd = os.open(path, os.O_DIRECTORY)
+ os.fsync(dfd)
+ os.close(dfd)
+ """.format(path=dir_path))
+
+ # run create/delete directories and test the sync time duration
+ for i in range(300):
+ for j in range(5):
+ self.mount_a.run_shell(["mkdir", os.path.join(dir_path, f"{i}_{j}")])
+ start = time.time()
+ if file_sync:
+ self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript])
+ else:
+ self.mount_a.run_shell(["sync"])
+ duration = time.time() - start
+ log.info(f"sync mkdir i = {i}, duration = {duration}")
+ self.assertLess(duration, 4)
+
+ for j in range(5):
+ self.mount_a.run_shell(["rm", "-rf", os.path.join(dir_path, f"{i}_{j}")])
+ start = time.time()
+ if file_sync:
+ self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript])
+ else:
+ self.mount_a.run_shell(["sync"])
+ duration = time.time() - start
+ log.info(f"sync rmdir i = {i}, duration = {duration}")
+ self.assertLess(duration, 4)
+
+ self.mount_a.run_shell(["rm", "-rf", dir_path])
+
+ def test_filesystem_sync_stuck_for_around_5s(self):
+ """
+ To check whether the fsync will be stuck to wait for the mdlog to be
+ flushed for at most 5 seconds.
+ """
+
+ dir_path = "filesystem_sync_do_not_wait_mdlog_testdir"
+ self._test_sync_stuck_for_around_5s(dir_path)
+
+ def test_file_sync_stuck_for_around_5s(self):
+ """
+ To check whether the filesystem sync will be stuck to wait for the
+ mdlog to be flushed for at most 5 seconds.
+ """
+
+ dir_path = "file_sync_do_not_wait_mdlog_testdir"
+ self._test_sync_stuck_for_around_5s(dir_path, True)
+
+ def test_file_filesystem_sync_crash(self):
+ """
+ To check whether the kernel crashes when doing the file/filesystem sync.
+ """
+
+ stop_thread = False
+ dir_path = "file_filesystem_sync_crash_testdir"
+ self.mount_a.run_shell(["mkdir", dir_path])
+
+ def mkdir_rmdir_thread(mount, path):
+ #global stop_thread
+
+ log.info(" mkdir_rmdir_thread starting...")
+ num = 0
+ while not stop_thread:
+ n = num
+ m = num
+ for __ in range(10):
+ mount.run_shell(["mkdir", os.path.join(path, f"{n}")])
+ n += 1
+ for __ in range(10):
+ mount.run_shell(["rm", "-rf", os.path.join(path, f"{m}")])
+ m += 1
+ num += 10
+ log.info(" mkdir_rmdir_thread stopped")
+
+ def filesystem_sync_thread(mount, path):
+ #global stop_thread
+
+ log.info(" filesystem_sync_thread starting...")
+ while not stop_thread:
+ mount.run_shell(["sync"])
+ log.info(" filesystem_sync_thread stopped")
+
+ def file_sync_thread(mount, path):
+ #global stop_thread
+
+ log.info(" file_sync_thread starting...")
+ pyscript = dedent("""
+ import os
+
+ path = "{path}"
+ dfd = os.open(path, os.O_DIRECTORY)
+ os.fsync(dfd)
+ os.close(dfd)
+ """.format(path=path))
+
+ while not stop_thread:
+ mount.run_shell(['python3', '-c', pyscript])
+ log.info(" file_sync_thread stopped")
+
+ td1 = Thread(target=mkdir_rmdir_thread, args=(self.mount_a, dir_path,))
+ td2 = Thread(target=filesystem_sync_thread, args=(self.mount_a, dir_path,))
+ td3 = Thread(target=file_sync_thread, args=(self.mount_a, dir_path,))
+
+ td1.start()
+ td2.start()
+ td3.start()
+ time.sleep(1200) # run 20 minutes
+ stop_thread = True
+ td1.join()
+ td2.join()
+ td3.join()
+ self.mount_a.run_shell(["rm", "-rf", dir_path])
+
+ def test_dump_inmemory_log_on_client_eviction(self):
+ """
+ That the in-memory logs are dumped during a client eviction event.
+ """
+ self.fs.mds_asok(['config', 'set', 'debug_mds', '1/10'])
+ self.fs.mds_asok(['config', 'set', 'mds_extraordinary_events_dump_interval', '1'])
+ mount_a_client_id = self.mount_a.get_global_id()
+ infos = self.fs.status().get_ranks(self.fs.id)
+
+ #evict the client
+ self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id])
+ time.sleep(10) #wait for 10 seconds for the logs dumping to complete.
+
+ #The client is evicted, so unmount it.
+ try:
+ self.mount_a.umount_wait(require_clean=True, timeout=30)
+ except:
+ pass #continue with grepping the log
+
+ eviction_log = f"Evicting (\(and blocklisting\) )?client session {mount_a_client_id} \(.+:.+/.+\)"
+ search_range = "/^--- begin dump of recent events ---$/,/^--- end dump of recent events ---$/p"
+ for info in infos:
+ mds_id = info['name']
+ try:
+ remote = self.fs.mon_manager.find_remote('mds', mds_id)
+ out = remote.run(args=["sed",
+ "-n",
+ "{0}".format(search_range),
+ f"/var/log/ceph/{self.mount_a.cluster_name}-mds.{mds_id}.log"],
+ stdout=StringIO(), timeout=30)
+ except:
+ continue #continue with the next info
+ if out.stdout and re.search(eviction_log, out.stdout.getvalue().strip()):
+ return
+ self.assertTrue(False, "Failed to dump in-memory logs during client eviction")
+
+ def test_dump_inmemory_log_on_missed_beacon_ack_from_monitors(self):
+ """
+ That the in-memory logs are dumped when the mds misses beacon ACKs from monitors.
+ """
+ self.fs.mds_asok(['config', 'set', 'debug_mds', '1/10'])
+ self.fs.mds_asok(['config', 'set', 'mds_extraordinary_events_dump_interval', '1'])
+ try:
+ mons = json.loads(self.fs.mon_manager.raw_cluster_cmd('mon', 'dump', '-f', 'json'))['mons']
+ except:
+ self.assertTrue(False, "Error fetching monitors")
+
+ #Freeze all monitors
+ for mon in mons:
+ mon_name = mon['name']
+ log.info(f'Sending STOP to mon {mon_name}')
+ self.fs.mon_manager.signal_mon(mon_name, 19)
+
+ time.sleep(10) #wait for 10 seconds to get the in-memory logs dumped
+
+ #Unfreeze all monitors
+ for mon in mons:
+ mon_name = mon['name']
+ log.info(f'Sending CONT to mon {mon_name}')
+ self.fs.mon_manager.signal_mon(mon_name, 18)
+
+ missed_beacon_ack_log = "missed beacon ack from the monitors"
+ search_range = "/^--- begin dump of recent events ---$/,/^--- end dump of recent events ---$/p"
+ for info in self.fs.status().get_ranks(self.fs.id):
+ mds_id = info['name']
+ try:
+ remote = self.fs.mon_manager.find_remote('mds', mds_id)
+ out = remote.run(args=["sed",
+ "-n",
+ "{0}".format(search_range),
+ f"/var/log/ceph/{self.mount_a.cluster_name}-mds.{mds_id}.log"],
+ stdout=StringIO(), timeout=30)
+ except:
+ continue #continue with the next info
+ if out.stdout and (missed_beacon_ack_log in out.stdout.getvalue().strip()):
+ return
+ self.assertTrue(False, "Failed to dump in-memory logs during missed beacon ack")
+
+ def test_dump_inmemory_log_on_missed_internal_heartbeats(self):
+ """
+ That the in-memory logs are dumped when the mds misses internal heartbeats.
+ """
+ self.fs.mds_asok(['config', 'set', 'debug_mds', '1/10'])
+ self.fs.mds_asok(['config', 'set', 'mds_heartbeat_grace', '1'])
+ self.fs.mds_asok(['config', 'set', 'mds_extraordinary_events_dump_interval', '1'])
+ try:
+ mons = json.loads(self.fs.mon_manager.raw_cluster_cmd('mon', 'dump', '-f', 'json'))['mons']
+ except:
+ self.assertTrue(False, "Error fetching monitors")
+
+ #Freeze all monitors
+ for mon in mons:
+ mon_name = mon['name']
+ log.info(f'Sending STOP to mon {mon_name}')
+ self.fs.mon_manager.signal_mon(mon_name, 19)
+
+ time.sleep(10) #wait for 10 seconds to get the in-memory logs dumped
+
+ #Unfreeze all monitors
+ for mon in mons:
+ mon_name = mon['name']
+ log.info(f'Sending CONT to mon {mon_name}')
+ self.fs.mon_manager.signal_mon(mon_name, 18)
+
+ missed_internal_heartbeat_log = \
+ "Skipping beacon heartbeat to monitors \(last acked .+s ago\); MDS internal heartbeat is not healthy!"
+ search_range = "/^--- begin dump of recent events ---$/,/^--- end dump of recent events ---$/p"
+ for info in self.fs.status().get_ranks(self.fs.id):
+ mds_id = info['name']
+ try:
+ remote = self.fs.mon_manager.find_remote('mds', mds_id)
+ out = remote.run(args=["sed",
+ "-n",
+ "{0}".format(search_range),
+ f"/var/log/ceph/{self.mount_a.cluster_name}-mds.{mds_id}.log"],
+ stdout=StringIO(), timeout=30)
+ except:
+ continue #continue with the next info
+ if out.stdout and re.search(missed_internal_heartbeat_log, out.stdout.getvalue().strip()):
+ return
+ self.assertTrue(False, "Failed to dump in-memory logs during missed internal heartbeat")
+
+ def _session_client_ls(self, cmd):
+ mount_a_client_id = self.mount_a.get_global_id()
+ info = self.fs.rank_asok(cmd)
+ mount_a_mountpoint = self.mount_a.mountpoint
+ mount_b_mountpoint = self.mount_b.mountpoint
+ self.assertIsNotNone(info)
+ for i in range(0, len(info)):
+ self.assertIn(info[i]["client_metadata"]["mount_point"],
+ [mount_a_mountpoint, mount_b_mountpoint])
+ info = self.fs.rank_asok(cmd + [f"id={mount_a_client_id}"])
+ self.assertEqual(len(info), 1)
+ self.assertEqual(info[0]["id"], mount_a_client_id)
+ self.assertEqual(info[0]["client_metadata"]["mount_point"], mount_a_mountpoint)
+ info = self.fs.rank_asok(cmd + ['--cap_dump'])
+ for i in range(0, len(info)):
+ self.assertIn("caps", info[i])
+
+ def test_session_ls(self):
+ self._session_client_ls(['session', 'ls'])
+
+ def test_client_ls(self):
+ self._session_client_ls(['client', 'ls'])
+
+class TestCacheDrop(CephFSTestCase):
+ CLIENTS_REQUIRED = 1
+
+ def _run_drop_cache_cmd(self, timeout=None):
+ result = None
+ args = ["cache", "drop"]
+ if timeout is not None:
+ args.append(str(timeout))
+ result = self.fs.rank_tell(args)
+ return result
+
+ def _setup(self, max_caps=20, threshold=400):
+ # create some files
+ self.mount_a.create_n_files("dc-dir/dc-file", 1000, sync=True)
+
+ # Reduce this so the MDS doesn't rkcall the maximum for simple tests
+ self.fs.rank_asok(['config', 'set', 'mds_recall_max_caps', str(max_caps)])
+ self.fs.rank_asok(['config', 'set', 'mds_recall_max_decay_threshold', str(threshold)])
+
+ def test_drop_cache_command(self):
+ """
+ Basic test for checking drop cache command.
+ Confirm it halts without a timeout.
+ Note that the cache size post trimming is not checked here.
+ """
+ mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client"))
+ self._setup()
+ result = self._run_drop_cache_cmd()
+ self.assertEqual(result['client_recall']['return_code'], 0)
+ self.assertEqual(result['flush_journal']['return_code'], 0)
+ # It should take at least 1 second
+ self.assertGreater(result['duration'], 1)
+ self.assertGreaterEqual(result['trim_cache']['trimmed'], 1000-2*mds_min_caps_per_client)
+
+ def test_drop_cache_command_timeout(self):
+ """
+ Basic test for checking drop cache command.
+ Confirm recall halts early via a timeout.
+ Note that the cache size post trimming is not checked here.
+ """
+ self._setup()
+ result = self._run_drop_cache_cmd(timeout=10)
+ self.assertEqual(result['client_recall']['return_code'], -errno.ETIMEDOUT)
+ self.assertEqual(result['flush_journal']['return_code'], 0)
+ self.assertGreater(result['duration'], 10)
+ self.assertGreaterEqual(result['trim_cache']['trimmed'], 100) # we did something, right?
+
+ def test_drop_cache_command_dead_timeout(self):
+ """
+ Check drop cache command with non-responding client using tell
+ interface. Note that the cache size post trimming is not checked
+ here.
+ """
+ self._setup()
+ self.mount_a.suspend_netns()
+ # Note: recall is subject to the timeout. The journal flush will
+ # be delayed due to the client being dead.
+ result = self._run_drop_cache_cmd(timeout=5)
+ self.assertEqual(result['client_recall']['return_code'], -errno.ETIMEDOUT)
+ self.assertEqual(result['flush_journal']['return_code'], 0)
+ self.assertGreater(result['duration'], 5)
+ self.assertLess(result['duration'], 120)
+ # Note: result['trim_cache']['trimmed'] may be >0 because dropping the
+ # cache now causes the Locker to drive eviction of stale clients (a
+ # stale session will be autoclosed at mdsmap['session_timeout']). The
+ # particular operation causing this is journal flush which causes the
+ # MDS to wait wait for cap revoke.
+ #self.assertEqual(0, result['trim_cache']['trimmed'])
+ self.mount_a.resume_netns()
+
+ def test_drop_cache_command_dead(self):
+ """
+ Check drop cache command with non-responding client using tell
+ interface. Note that the cache size post trimming is not checked
+ here.
+ """
+ self._setup()
+ self.mount_a.suspend_netns()
+ result = self._run_drop_cache_cmd()
+ self.assertEqual(result['client_recall']['return_code'], 0)
+ self.assertEqual(result['flush_journal']['return_code'], 0)
+ self.assertGreater(result['duration'], 5)
+ self.assertLess(result['duration'], 120)
+ # Note: result['trim_cache']['trimmed'] may be >0 because dropping the
+ # cache now causes the Locker to drive eviction of stale clients (a
+ # stale session will be autoclosed at mdsmap['session_timeout']). The
+ # particular operation causing this is journal flush which causes the
+ # MDS to wait wait for cap revoke.
+ self.mount_a.resume_netns()
+
+class TestSkipReplayInoTable(CephFSTestCase):
+ MDSS_REQUIRED = 1
+ CLIENTS_REQUIRED = 1
+
+ def test_alloc_cinode_assert(self):
+ """
+ Test alloc CInode assert.
+
+ See: https://tracker.ceph.com/issues/52280
+ """
+
+ # Create a directory and the mds will journal this and then crash
+ self.mount_a.run_shell(["rm", "-rf", "test_alloc_ino"])
+ self.mount_a.run_shell(["mkdir", "test_alloc_ino"])
+
+ status = self.fs.status()
+ rank0 = self.fs.get_rank(rank=0, status=status)
+
+ self.fs.mds_asok(['config', 'set', 'mds_kill_skip_replaying_inotable', "true"])
+ # This will make the MDS crash, since we only have one MDS in the
+ # cluster and without the "wait=False" it will stuck here forever.
+ self.mount_a.run_shell(["mkdir", "test_alloc_ino/dir1"], wait=False)
+
+ # sleep 10 seconds to make sure the journal logs are flushed and
+ # the mds crashes
+ time.sleep(10)
+
+ # Now set the mds config to skip replaying the inotable
+ self.fs.set_ceph_conf('mds', 'mds_inject_skip_replaying_inotable', True)
+ self.fs.set_ceph_conf('mds', 'mds_wipe_sessions', True)
+
+ self.fs.mds_restart()
+ # sleep 5 seconds to make sure the mds tell command won't stuck
+ time.sleep(5)
+ self.fs.wait_for_daemons()
+
+ self.delete_mds_coredump(rank0['name']);
+
+ self.mount_a.run_shell(["mkdir", "test_alloc_ino/dir2"])
+
+ ls_out = set(self.mount_a.ls("test_alloc_ino/"))
+ self.assertEqual(ls_out, set({"dir1", "dir2"}))
diff --git a/qa/tasks/cephfs/test_multifs_auth.py b/qa/tasks/cephfs/test_multifs_auth.py
new file mode 100644
index 000000000..c9ea5f528
--- /dev/null
+++ b/qa/tasks/cephfs/test_multifs_auth.py
@@ -0,0 +1,297 @@
+"""
+Test for Ceph clusters with multiple FSs.
+"""
+import logging
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from tasks.cephfs.caps_helper import CapTester
+
+from teuthology.exceptions import CommandFailedError
+
+
+log = logging.getLogger(__name__)
+
+
+class TestMultiFS(CephFSTestCase):
+ client_id = 'testuser'
+ client_name = 'client.' + client_id
+ # one dedicated for each FS
+ MDSS_REQUIRED = 2
+ CLIENTS_REQUIRED = 2
+
+ def setUp(self):
+ super(TestMultiFS, self).setUp()
+
+ self.captester = CapTester()
+
+ # we might have it - the client - if the same cluster was used for a
+ # different vstart_runner.py run.
+ self.run_cluster_cmd(f'auth rm {self.client_name}')
+
+ self.fs1 = self.fs
+ self.fs2 = self.mds_cluster.newfs(name='cephfs2', create=True)
+
+ # we'll reassign caps to client.1 so that it can operate with cephfs2
+ self.run_cluster_cmd(f'auth caps client.{self.mount_b.client_id} mon '
+ f'"allow r" osd "allow rw '
+ f'pool={self.fs2.get_data_pool_name()}" mds allow')
+ self.mount_b.remount(cephfs_name=self.fs2.name)
+
+
+class TestMONCaps(TestMultiFS):
+
+ def test_moncap_with_one_fs_names(self):
+ moncap = f'allow r fsname={self.fs1.name}'
+ self.create_client(self.client_id, moncap)
+
+ self.captester.run_mon_cap_tests(self.fs1, self.client_id)
+
+ def test_moncap_with_multiple_fs_names(self):
+ moncap = (f'allow r fsname={self.fs1.name}, '
+ f'allow r fsname={self.fs2.name}')
+ self.create_client(self.client_id, moncap)
+
+ self.captester.run_mon_cap_tests(self.fs1, self.client_id)
+
+ def test_moncap_with_blanket_allow(self):
+ moncap = 'allow r'
+ self.create_client(self.client_id, moncap)
+
+ self.captester.run_mon_cap_tests(self.fs1, self.client_id)
+
+
+#TODO: add tests for capsecs 'p' and 's'.
+class TestMDSCaps(TestMultiFS):
+ """
+ 0. Have 2 FSs on Ceph cluster.
+ 1. Create new files on both FSs.
+ 2. Create a new client that has authorization for both FSs.
+ 3. Remount the current mounts with this new client.
+ 4. Test read and write on both FSs.
+ """
+ def setUp(self):
+ super(self.__class__, self).setUp()
+ self.mounts = (self.mount_a, self.mount_b)
+
+ def test_rw_with_fsname_and_no_path_in_cap(self):
+ PERM = 'rw'
+ self.captester.write_test_files(self.mounts)
+ keyring_paths = self._create_client(PERM, fsname=True)
+ self.remount_with_new_client(keyring_paths)
+
+ self.captester.run_mds_cap_tests(PERM)
+
+ def test_r_with_fsname_and_no_path_in_cap(self):
+ PERM = 'r'
+ self.captester.write_test_files(self.mounts)
+ keyring_paths = self._create_client(PERM, fsname=True)
+ self.remount_with_new_client(keyring_paths)
+
+ self.captester.run_mds_cap_tests(PERM)
+
+ def test_rw_with_fsname_and_path_in_cap(self):
+ PERM, CEPHFS_MNTPT = 'rw', 'dir1'
+ self.mount_a.run_shell(f'mkdir {CEPHFS_MNTPT}')
+ self.mount_b.run_shell(f'mkdir {CEPHFS_MNTPT}')
+ self.captester.write_test_files(self.mounts, CEPHFS_MNTPT)
+ keyring_paths = self._create_client(PERM, fsname=True)
+ self.remount_with_new_client(keyring_paths, CEPHFS_MNTPT)
+
+ self.captester.run_mds_cap_tests(PERM, CEPHFS_MNTPT)
+
+ def test_r_with_fsname_and_path_in_cap(self):
+ PERM, CEPHFS_MNTPT = 'r', 'dir1'
+ self.mount_a.run_shell(f'mkdir {CEPHFS_MNTPT}')
+ self.mount_b.run_shell(f'mkdir {CEPHFS_MNTPT}')
+ self.captester.write_test_files(self.mounts, CEPHFS_MNTPT)
+ keyring_paths = self._create_client(PERM, fsname=True)
+ self.remount_with_new_client(keyring_paths, CEPHFS_MNTPT)
+
+ self.captester.run_mds_cap_tests(PERM, CEPHFS_MNTPT)
+
+ # XXX: this tests the backward compatibility; "allow rw path=<dir1>" is
+ # treated as "allow rw fsname=* path=<dir1>"
+ def test_rw_with_no_fsname_and_path_in_cap(self):
+ PERM, CEPHFS_MNTPT = 'rw', 'dir1'
+ self.mount_a.run_shell(f'mkdir {CEPHFS_MNTPT}')
+ self.mount_b.run_shell(f'mkdir {CEPHFS_MNTPT}')
+ self.captester.write_test_files(self.mounts, CEPHFS_MNTPT)
+ keyring_paths = self._create_client(PERM)
+ self.remount_with_new_client(keyring_paths, CEPHFS_MNTPT)
+
+ self.captester.run_mds_cap_tests(PERM, CEPHFS_MNTPT)
+
+ # XXX: this tests the backward compatibility; "allow r path=<dir1>" is
+ # treated as "allow r fsname=* path=<dir1>"
+ def test_r_with_no_fsname_and_path_in_cap(self):
+ PERM, CEPHFS_MNTPT = 'r', 'dir1'
+ self.mount_a.run_shell(f'mkdir {CEPHFS_MNTPT}')
+ self.mount_b.run_shell(f'mkdir {CEPHFS_MNTPT}')
+ self.captester.write_test_files(self.mounts, CEPHFS_MNTPT)
+ keyring_paths = self._create_client(PERM)
+ self.remount_with_new_client(keyring_paths, CEPHFS_MNTPT)
+
+ self.captester.run_mds_cap_tests(PERM, CEPHFS_MNTPT)
+
+ def test_rw_with_no_fsname_and_no_path(self):
+ PERM = 'rw'
+ self.captester.write_test_files(self.mounts)
+ keyring_paths = self._create_client(PERM)
+ self.remount_with_new_client(keyring_paths)
+
+ self.captester.run_mds_cap_tests(PERM)
+
+ def test_r_with_no_fsname_and_no_path(self):
+ PERM = 'r'
+ self.captester.write_test_files(self.mounts)
+ keyring_paths = self._create_client(PERM)
+ self.remount_with_new_client(keyring_paths)
+
+ self.captester.run_mds_cap_tests(PERM)
+
+ def tearDown(self):
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ super(type(self), self).tearDown()
+
+ def generate_caps(self, perm, fsname, cephfs_mntpt):
+ moncap = 'allow r'
+ osdcap = (f'allow {perm} tag cephfs data={self.fs1.name}, '
+ f'allow {perm} tag cephfs data={self.fs2.name}')
+
+ if fsname:
+ if cephfs_mntpt == '/':
+ mdscap = (f'allow {perm} fsname={self.fs1.name}, '
+ f'allow {perm} fsname={self.fs2.name}')
+ else:
+ mdscap = (f'allow {perm} fsname={self.fs1.name} '
+ f'path=/{cephfs_mntpt}, '
+ f'allow {perm} fsname={self.fs2.name} '
+ f'path=/{cephfs_mntpt}')
+ else:
+ if cephfs_mntpt == '/':
+ mdscap = f'allow {perm}'
+ else:
+ mdscap = f'allow {perm} path=/{cephfs_mntpt}'
+
+ return moncap, osdcap, mdscap
+
+ def _create_client(self, perm, fsname=False, cephfs_mntpt='/'):
+ moncap, osdcap, mdscap = self.generate_caps(perm, fsname,
+ cephfs_mntpt)
+
+ keyring = self.create_client(self.client_id, moncap, osdcap, mdscap)
+ keyring_paths = []
+ for mount_x in self.mounts:
+ keyring_paths.append(mount_x.client_remote.mktemp(data=keyring))
+
+ return keyring_paths
+
+ def remount_with_new_client(self, keyring_paths, cephfs_mntpt='/'):
+ if isinstance(cephfs_mntpt, str) and cephfs_mntpt != '/' :
+ cephfs_mntpt = '/' + cephfs_mntpt
+
+ self.mount_a.remount(client_id=self.client_id,
+ client_keyring_path=keyring_paths[0],
+ client_remote=self.mount_a.client_remote,
+ cephfs_name=self.fs1.name,
+ cephfs_mntpt=cephfs_mntpt,
+ hostfs_mntpt=self.mount_a.hostfs_mntpt,
+ wait=True)
+ self.mount_b.remount(client_id=self.client_id,
+ client_keyring_path=keyring_paths[1],
+ client_remote=self.mount_b.client_remote,
+ cephfs_name=self.fs2.name,
+ cephfs_mntpt=cephfs_mntpt,
+ hostfs_mntpt=self.mount_b.hostfs_mntpt,
+ wait=True)
+
+
+class TestClientsWithoutAuth(TestMultiFS):
+
+ def setUp(self):
+ super(TestClientsWithoutAuth, self).setUp()
+
+ # TODO: When MON and OSD caps for a Ceph FS are assigned to a
+ # client but MDS caps are not, mount.ceph prints "permission
+ # denied". But when MON caps are not assigned and MDS and OSD
+ # caps are, mount.ceph prints "no mds server or cluster laggy"
+ # instead of "permission denied".
+ #
+ # Before uncommenting the following line a fix would be required
+ # for latter case to change "no mds server is up or the cluster is
+ # laggy" to "permission denied".
+ self.kernel_errmsgs = ('permission denied', 'no mds server is up or '
+ 'the cluster is laggy', 'no such file or '
+ 'directory',
+ 'input/output error')
+
+ # TODO: When MON and OSD caps are assigned for a Ceph FS to a
+ # client but MDS caps are not, ceph-fuse prints "operation not
+ # permitted". But when MON caps are not assigned and MDS and OSD
+ # caps are, ceph-fuse prints "no such file or directory" instead
+ # of "operation not permitted".
+ #
+ # Before uncommenting the following line a fix would be required
+ # for the latter case to change "no such file or directory" to
+ # "operation not permitted".
+ #self.assertIn('operation not permitted', retval[2].lower())
+ self.fuse_errmsgs = ('operation not permitted', 'no such file or '
+ 'directory')
+
+ if 'kernel' in str(type(self.mount_a)).lower():
+ self.errmsgs = self.kernel_errmsgs
+ elif 'fuse' in str(type(self.mount_a)).lower():
+ self.errmsgs = self.fuse_errmsgs
+ else:
+ raise RuntimeError('strange, the client was neither based on '
+ 'kernel nor FUSE.')
+
+ def check_that_mount_failed_for_right_reason(self, stderr):
+ stderr = stderr.lower()
+ for errmsg in self.errmsgs:
+ if errmsg in stderr:
+ break
+ else:
+ raise AssertionError('can\'t find expected set of words in the '
+ f'stderr\nself.errmsgs - {self.errmsgs}\n'
+ f'stderr - {stderr}')
+
+ def test_mount_all_caps_absent(self):
+ # setup part...
+ keyring = self.fs1.authorize(self.client_id, ('/', 'rw'))
+ keyring_path = self.mount_a.client_remote.mktemp(data=keyring)
+
+ # mount the FS for which client has no auth...
+ retval = self.mount_a.remount(client_id=self.client_id,
+ client_keyring_path=keyring_path,
+ cephfs_name=self.fs2.name,
+ check_status=False)
+
+ # tests...
+ self.assertIsInstance(retval, tuple)
+ self.assertEqual(len(retval), 3)
+ self.assertIsInstance(retval[0], CommandFailedError)
+ self.check_that_mount_failed_for_right_reason(retval[2])
+
+ def test_mount_mon_and_osd_caps_present_mds_caps_absent(self):
+ # setup part...
+ moncap = f'allow rw fsname={self.fs1.name}, allow rw fsname={self.fs2.name}'
+ mdscap = f'allow rw fsname={self.fs1.name}'
+ osdcap = (f'allow rw tag cephfs data={self.fs1.name}, allow rw tag '
+ f'cephfs data={self.fs2.name}')
+ keyring = self.create_client(self.client_id, moncap, osdcap, mdscap)
+ keyring_path = self.mount_a.client_remote.mktemp(data=keyring)
+
+ # mount the FS for which client has no auth...
+ retval = self.mount_a.remount(client_id=self.client_id,
+ client_keyring_path=keyring_path,
+ cephfs_name=self.fs2.name,
+ check_status=False)
+
+ # tests...
+ self.assertIsInstance(retval, tuple)
+ self.assertEqual(len(retval), 3)
+ self.assertIsInstance(retval[0], CommandFailedError)
+ self.check_that_mount_failed_for_right_reason(retval[2])
diff --git a/qa/tasks/cephfs/test_multimds_misc.py b/qa/tasks/cephfs/test_multimds_misc.py
new file mode 100644
index 000000000..2bb6257c7
--- /dev/null
+++ b/qa/tasks/cephfs/test_multimds_misc.py
@@ -0,0 +1,223 @@
+import logging
+import errno
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.contextutil import safe_while
+from teuthology.exceptions import CommandFailedError
+
+log = logging.getLogger(__name__)
+
+class TestScrub2(CephFSTestCase):
+ MDSS_REQUIRED = 3
+ CLIENTS_REQUIRED = 1
+
+ def _check_scrub_status(self, result=None, reverse=False):
+ self.assertEqual(self.fs.wait_until_scrub_complete(result=result, rank=1,
+ sleep=5, timeout=30,
+ reverse=reverse), True)
+ self.assertEqual(self.fs.wait_until_scrub_complete(result=result, rank=2,
+ sleep=5, timeout=30,
+ reverse=reverse), True)
+ self.assertEqual(self.fs.wait_until_scrub_complete(result=result, rank=0,
+ sleep=5, timeout=30,
+ reverse=reverse), True)
+
+ def _check_task_status_na(self, timo=120):
+ """ check absence of scrub status in ceph status """
+ with safe_while(sleep=1, tries=120, action='wait for task status') as proceed:
+ while proceed():
+ active = self.fs.get_active_names()
+ log.debug("current active={0}".format(active))
+ task_status = self.fs.get_task_status("scrub status")
+ if not active[0] in task_status:
+ return True
+
+ def _check_task_status(self, expected_status, timo=120):
+ """ check scrub status for current active mds in ceph status """
+ with safe_while(sleep=1, tries=120, action='wait for task status') as proceed:
+ while proceed():
+ active = self.fs.get_active_names()
+ log.debug("current active={0}".format(active))
+ task_status = self.fs.get_task_status("scrub status")
+ try:
+ if task_status[active[0]].startswith(expected_status):
+ return True
+ except KeyError:
+ pass
+
+ def _find_path_inos(self, root_path):
+ inos = []
+ p = self.mount_a.run_shell(["find", root_path])
+ paths = p.stdout.getvalue().strip().split()
+ for path in paths:
+ inos.append(self.mount_a.path_to_ino(path))
+ return inos
+
+ def _setup_subtrees(self):
+ self.fs.set_max_mds(3)
+ self.fs.wait_for_daemons()
+ status = self.fs.status()
+
+ path = 'd1/d2/d3/d4/d5/d6/d7/d8'
+ self.mount_a.run_shell(['mkdir', '-p', path])
+ self.mount_a.run_shell(['sync', path])
+
+ self.mount_a.setfattr("d1/d2", "ceph.dir.pin", "0")
+ self.mount_a.setfattr("d1/d2/d3/d4", "ceph.dir.pin", "1")
+ self.mount_a.setfattr("d1/d2/d3/d4/d5/d6", "ceph.dir.pin", "2")
+
+ self._wait_subtrees([('/d1/d2', 0), ('/d1/d2/d3/d4', 1)], status, 0)
+ self._wait_subtrees([('/d1/d2/d3/d4', 1), ('/d1/d2/d3/d4/d5/d6', 2)], status, 1)
+ self._wait_subtrees([('/d1/d2/d3/d4', 1), ('/d1/d2/d3/d4/d5/d6', 2)], status, 2)
+
+ for rank in range(3):
+ self.fs.rank_tell(["flush", "journal"], rank)
+
+ def test_apply_tag(self):
+ self._setup_subtrees()
+ inos = self._find_path_inos('d1/d2/d3/')
+
+ tag = "tag123"
+ out_json = self.fs.rank_tell(["tag", "path", "/d1/d2/d3", tag], 0)
+ self.assertNotEqual(out_json, None)
+ self.assertEqual(out_json["return_code"], 0)
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+ def assertTagged(ino):
+ file_obj_name = "{0:x}.00000000".format(ino)
+ self.fs.radosm(["getxattr", file_obj_name, "scrub_tag"])
+
+ for ino in inos:
+ assertTagged(ino)
+
+ def test_scrub_backtrace(self):
+ self._setup_subtrees()
+ inos = self._find_path_inos('d1/d2/d3/')
+
+ for ino in inos:
+ file_obj_name = "{0:x}.00000000".format(ino)
+ self.fs.radosm(["rmxattr", file_obj_name, "parent"])
+
+ out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0)
+ self.assertNotEqual(out_json, None)
+ self.assertEqual(out_json["return_code"], 0)
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+ def _check_damage(mds_rank, inos):
+ all_damage = self.fs.rank_tell(["damage", "ls"], mds_rank)
+ damage = [d for d in all_damage if d['ino'] in inos and d['damage_type'] == "backtrace"]
+ return len(damage) >= len(inos)
+
+ self.assertTrue(_check_damage(0, inos[0:2]))
+ self.assertTrue(_check_damage(1, inos[2:4]))
+ self.assertTrue(_check_damage(2, inos[4:6]))
+
+ def test_scrub_non_mds0(self):
+ self._setup_subtrees()
+
+ def expect_exdev(cmd, mds):
+ try:
+ self.fs.mon_manager.raw_cluster_cmd('tell', 'mds.{0}'.format(mds), *cmd)
+ except CommandFailedError as e:
+ if e.exitstatus == errno.EXDEV:
+ pass
+ else:
+ raise
+ else:
+ raise RuntimeError("expected failure")
+
+ rank1 = self.fs.get_rank(rank=1)
+ expect_exdev(["scrub", "start", "/d1/d2/d3"], rank1["name"])
+ expect_exdev(["scrub", "abort"], rank1["name"])
+ expect_exdev(["scrub", "pause"], rank1["name"])
+ expect_exdev(["scrub", "resume"], rank1["name"])
+
+ def test_scrub_abort_mds0(self):
+ self._setup_subtrees()
+
+ inos = self._find_path_inos('d1/d2/d3/')
+
+ for ino in inos:
+ file_obj_name = "{0:x}.00000000".format(ino)
+ self.fs.radosm(["rmxattr", file_obj_name, "parent"])
+
+ out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0)
+ self.assertNotEqual(out_json, None)
+
+ res = self.fs.run_scrub(["abort"])
+ self.assertEqual(res['return_code'], 0)
+
+ # Abort and verify in both mdss. We also check the status in rank 0 mds because
+ # it is supposed to gather the scrub status from other mdss.
+ self._check_scrub_status()
+
+ # sleep enough to fetch updated task status
+ checked = self._check_task_status_na()
+ self.assertTrue(checked)
+
+ def test_scrub_pause_and_resume_mds0(self):
+ self._setup_subtrees()
+
+ inos = self._find_path_inos('d1/d2/d3/')
+
+ for ino in inos:
+ file_obj_name = "{0:x}.00000000".format(ino)
+ self.fs.radosm(["rmxattr", file_obj_name, "parent"])
+
+ out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0)
+ self.assertNotEqual(out_json, None)
+
+ res = self.fs.run_scrub(["pause"])
+ self.assertEqual(res['return_code'], 0)
+
+ self._check_scrub_status(result="PAUSED")
+
+ checked = self._check_task_status("paused")
+ self.assertTrue(checked)
+
+ # resume and verify
+ res = self.fs.run_scrub(["resume"])
+ self.assertEqual(res['return_code'], 0)
+
+ self._check_scrub_status(result="PAUSED", reverse=True)
+
+ checked = self._check_task_status_na()
+ self.assertTrue(checked)
+
+ def test_scrub_pause_and_resume_with_abort_mds0(self):
+ self._setup_subtrees()
+
+ inos = self._find_path_inos('d1/d2/d3/')
+
+ for ino in inos:
+ file_obj_name = "{0:x}.00000000".format(ino)
+ self.fs.radosm(["rmxattr", file_obj_name, "parent"])
+
+ out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0)
+ self.assertNotEqual(out_json, None)
+
+ res = self.fs.run_scrub(["pause"])
+ self.assertEqual(res['return_code'], 0)
+
+ self._check_scrub_status(result="PAUSED")
+
+ checked = self._check_task_status("paused")
+ self.assertTrue(checked)
+
+ res = self.fs.run_scrub(["abort"])
+ self.assertEqual(res['return_code'], 0)
+
+ self._check_scrub_status(result="PAUSED")
+ self._check_scrub_status(result="0 inodes")
+
+ # scrub status should still be paused...
+ checked = self._check_task_status("paused")
+ self.assertTrue(checked)
+
+ # resume and verify
+ res = self.fs.run_scrub(["resume"])
+ self.assertEqual(res['return_code'], 0)
+
+ self._check_scrub_status(result="PAUSED", reverse=True)
+
+ checked = self._check_task_status_na()
+ self.assertTrue(checked)
diff --git a/qa/tasks/cephfs/test_newops.py b/qa/tasks/cephfs/test_newops.py
new file mode 100644
index 000000000..0071cb5d3
--- /dev/null
+++ b/qa/tasks/cephfs/test_newops.py
@@ -0,0 +1,18 @@
+import logging
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+class TestNewOps(CephFSTestCase):
+ def test_newops_getvxattr(self):
+ """
+ For nautilus it will crash the MDSs when receive unknown OPs, as a workaround
+ the clients should avoid sending them to nautilus
+ """
+
+ log.info("Test for new getvxattr op...")
+ self.mount_a.run_shell(["mkdir", "newop_getvxattr_dir"])
+
+ # to test whether will nautilus crash the MDSs
+ self.mount_a.getfattr("./newop_getvxattr_dir", "ceph.dir.pin.random")
+ log.info("Test for new getvxattr op succeeds")
diff --git a/qa/tasks/cephfs/test_nfs.py b/qa/tasks/cephfs/test_nfs.py
new file mode 100644
index 000000000..0a10709e6
--- /dev/null
+++ b/qa/tasks/cephfs/test_nfs.py
@@ -0,0 +1,880 @@
+# NOTE: these tests are not yet compatible with vstart_runner.py.
+import errno
+import json
+import time
+import logging
+from io import BytesIO, StringIO
+
+from tasks.mgr.mgr_test_case import MgrTestCase
+from teuthology import contextutil
+from teuthology.exceptions import CommandFailedError
+
+log = logging.getLogger(__name__)
+
+NFS_POOL_NAME = '.nfs' # should match mgr_module.py
+
+# TODO Add test for cluster update when ganesha can be deployed on multiple ports.
+class TestNFS(MgrTestCase):
+ def _cmd(self, *args):
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args)
+
+ def _nfs_cmd(self, *args):
+ return self._cmd("nfs", *args)
+
+ def _nfs_complete_cmd(self, cmd):
+ return self.mgr_cluster.mon_manager.run_cluster_cmd(args=f"nfs {cmd}",
+ stdout=StringIO(),
+ stderr=StringIO(),
+ check_status=False)
+
+ def _orch_cmd(self, *args):
+ return self._cmd("orch", *args)
+
+ def _sys_cmd(self, cmd):
+ ret = self.ctx.cluster.run(args=cmd, check_status=False, stdout=BytesIO(), stderr=BytesIO())
+ stdout = ret[0].stdout
+ if stdout:
+ return stdout.getvalue()
+
+ def setUp(self):
+ super(TestNFS, self).setUp()
+ self._load_module('nfs')
+ self.cluster_id = "test"
+ self.export_type = "cephfs"
+ self.pseudo_path = "/cephfs"
+ self.path = "/"
+ self.fs_name = "nfs-cephfs"
+ self.expected_name = "nfs.test"
+ self.sample_export = {
+ "export_id": 1,
+ "path": self.path,
+ "cluster_id": self.cluster_id,
+ "pseudo": self.pseudo_path,
+ "access_type": "RW",
+ "squash": "none",
+ "security_label": True,
+ "protocols": [
+ 4
+ ],
+ "transports": [
+ "TCP"
+ ],
+ "fsal": {
+ "name": "CEPH",
+ "user_id": "nfs.test.1",
+ "fs_name": self.fs_name,
+ },
+ "clients": []
+ }
+
+ def _check_nfs_server_status(self):
+ res = self._sys_cmd(['sudo', 'systemctl', 'status', 'nfs-server'])
+ if isinstance(res, bytes) and b'Active: active' in res:
+ self._disable_nfs()
+
+ def _disable_nfs(self):
+ log.info("Disabling NFS")
+ self._sys_cmd(['sudo', 'systemctl', 'disable', 'nfs-server', '--now'])
+
+ def _fetch_nfs_daemons_details(self, enable_json=False):
+ args = ('ps', f'--service_name={self.expected_name}')
+ if enable_json:
+ args = (*args, '--format=json')
+ return self._orch_cmd(*args)
+
+ def _check_nfs_cluster_event(self, expected_event):
+ '''
+ Check whether an event occured during the lifetime of the NFS service
+ :param expected_event: event that was expected to occur
+ '''
+ event_occurred = False
+ # Wait few seconds for NFS daemons' status to be updated
+ with contextutil.safe_while(sleep=10, tries=18, _raise=False) as proceed:
+ while not event_occurred and proceed():
+ daemons_details = json.loads(
+ self._fetch_nfs_daemons_details(enable_json=True))
+ log.info('daemons details %s', daemons_details)
+ # 'events' key may not exist in the daemon description
+ # after a mgr fail over and could take some time to appear
+ # (it's populated on first daemon event)
+ if 'events' not in daemons_details[0]:
+ continue
+ for event in daemons_details[0]['events']:
+ log.info('daemon event %s', event)
+ if expected_event in event:
+ event_occurred = True
+ break
+ return event_occurred
+
+ def _check_nfs_cluster_status(self, expected_status, fail_msg):
+ '''
+ Check the current status of the NFS service
+ :param expected_status: Status to be verified
+ :param fail_msg: Message to be printed if test failed
+ '''
+ # Wait for a minute as ganesha daemon takes some time to be
+ # deleted/created
+ with contextutil.safe_while(sleep=6, tries=10, _raise=False) as proceed:
+ while proceed():
+ if expected_status in self._fetch_nfs_daemons_details():
+ return
+ self.fail(fail_msg)
+
+ def _check_auth_ls(self, export_id=1, check_in=False):
+ '''
+ Tests export user id creation or deletion.
+ :param export_id: Denotes export number
+ :param check_in: Check specified export id
+ '''
+ output = self._cmd('auth', 'ls')
+ client_id = f'client.nfs.{self.cluster_id}'
+ if check_in:
+ self.assertIn(f'{client_id}.{export_id}', output)
+ else:
+ self.assertNotIn(f'{client_id}.{export_id}', output)
+
+ def _test_idempotency(self, cmd_func, cmd_args):
+ '''
+ Test idempotency of commands. It first runs the TestNFS test method
+ for a command and then checks the result of command run again. TestNFS
+ test method has required checks to verify that command works.
+ :param cmd_func: TestNFS method
+ :param cmd_args: nfs command arguments to be run
+ '''
+ cmd_func()
+ ret = self.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd_args)
+ if ret != 0:
+ self.fail("Idempotency test failed")
+
+ def _test_create_cluster(self):
+ '''
+ Test single nfs cluster deployment.
+ '''
+ with contextutil.safe_while(sleep=4, tries=10) as proceed:
+ while proceed():
+ try:
+ # Disable any running nfs ganesha daemon
+ self._check_nfs_server_status()
+ cluster_create = self._nfs_complete_cmd(
+ f'cluster create {self.cluster_id}')
+ if cluster_create.stderr and 'cluster already exists' \
+ in cluster_create.stderr.getvalue():
+ self._test_delete_cluster()
+ continue
+ # Check for expected status and daemon name
+ # (nfs.<cluster_id>)
+ self._check_nfs_cluster_status(
+ 'running', 'NFS Ganesha cluster deployment failed')
+ break
+ except (AssertionError, CommandFailedError) as e:
+ log.warning(f'{e}, retrying')
+
+ def _test_delete_cluster(self):
+ '''
+ Test deletion of a single nfs cluster.
+ '''
+ self._nfs_cmd('cluster', 'rm', self.cluster_id)
+ self._check_nfs_cluster_status('No daemons reported',
+ 'NFS Ganesha cluster could not be deleted')
+
+ def _test_list_cluster(self, empty=False):
+ '''
+ Test listing of deployed nfs clusters. If nfs cluster is deployed then
+ it checks for expected cluster id. Otherwise checks nothing is listed.
+ :param empty: If true it denotes no cluster is deployed.
+ '''
+ nfs_output = self._nfs_cmd('cluster', 'ls')
+ jdata = json.loads(nfs_output)
+ if empty:
+ self.assertEqual(len(jdata), 0)
+ else:
+ cluster_id = self.cluster_id
+ self.assertEqual([cluster_id], jdata)
+
+ def _create_export(self, export_id, create_fs=False, extra_cmd=None):
+ '''
+ Test creation of a single export.
+ :param export_id: Denotes export number
+ :param create_fs: If false filesytem exists. Otherwise create it.
+ :param extra_cmd: List of extra arguments for creating export.
+ '''
+ if create_fs:
+ self._cmd('fs', 'volume', 'create', self.fs_name)
+ with contextutil.safe_while(sleep=5, tries=30) as proceed:
+ while proceed():
+ output = self._cmd(
+ 'orch', 'ls', '-f', 'json',
+ '--service-name', f'mds.{self.fs_name}'
+ )
+ j = json.loads(output)
+ if j[0]['status']['running']:
+ break
+ export_cmd = ['nfs', 'export', 'create', 'cephfs',
+ '--fsname', self.fs_name, '--cluster-id', self.cluster_id]
+ if isinstance(extra_cmd, list):
+ export_cmd.extend(extra_cmd)
+ else:
+ export_cmd.extend(['--pseudo-path', self.pseudo_path])
+ # Runs the nfs export create command
+ self._cmd(*export_cmd)
+ # Check if user id for export is created
+ self._check_auth_ls(export_id, check_in=True)
+ res = self._sys_cmd(['rados', '-p', NFS_POOL_NAME, '-N', self.cluster_id, 'get',
+ f'export-{export_id}', '-'])
+ # Check if export object is created
+ if res == b'':
+ self.fail("Export cannot be created")
+
+ def _create_default_export(self):
+ '''
+ Deploy a single nfs cluster and create export with default options.
+ '''
+ self._test_create_cluster()
+ self._create_export(export_id='1', create_fs=True)
+
+ def _delete_export(self):
+ '''
+ Delete an export.
+ '''
+ self._nfs_cmd('export', 'rm', self.cluster_id, self.pseudo_path)
+ self._check_auth_ls()
+
+ def _test_list_export(self):
+ '''
+ Test listing of created exports.
+ '''
+ nfs_output = json.loads(self._nfs_cmd('export', 'ls', self.cluster_id))
+ self.assertIn(self.pseudo_path, nfs_output)
+
+ def _test_list_detailed(self, sub_vol_path):
+ '''
+ Test listing of created exports with detailed option.
+ :param sub_vol_path: Denotes path of subvolume
+ '''
+ nfs_output = json.loads(self._nfs_cmd('export', 'ls', self.cluster_id, '--detailed'))
+ # Export-1 with default values (access type = rw and path = '\')
+ self.assertDictEqual(self.sample_export, nfs_output[0])
+ # Export-2 with r only
+ self.sample_export['export_id'] = 2
+ self.sample_export['pseudo'] = self.pseudo_path + '1'
+ self.sample_export['access_type'] = 'RO'
+ self.sample_export['fsal']['user_id'] = f'{self.expected_name}.2'
+ self.assertDictEqual(self.sample_export, nfs_output[1])
+ # Export-3 for subvolume with r only
+ self.sample_export['export_id'] = 3
+ self.sample_export['path'] = sub_vol_path
+ self.sample_export['pseudo'] = self.pseudo_path + '2'
+ self.sample_export['fsal']['user_id'] = f'{self.expected_name}.3'
+ self.assertDictEqual(self.sample_export, nfs_output[2])
+ # Export-4 for subvolume
+ self.sample_export['export_id'] = 4
+ self.sample_export['pseudo'] = self.pseudo_path + '3'
+ self.sample_export['access_type'] = 'RW'
+ self.sample_export['fsal']['user_id'] = f'{self.expected_name}.4'
+ self.assertDictEqual(self.sample_export, nfs_output[3])
+
+ def _get_export(self):
+ '''
+ Returns export block in json format
+ '''
+ return json.loads(self._nfs_cmd('export', 'info', self.cluster_id, self.pseudo_path))
+
+ def _test_get_export(self):
+ '''
+ Test fetching of created export.
+ '''
+ nfs_output = self._get_export()
+ self.assertDictEqual(self.sample_export, nfs_output)
+
+ def _check_export_obj_deleted(self, conf_obj=False):
+ '''
+ Test if export or config object are deleted successfully.
+ :param conf_obj: It denotes config object needs to be checked
+ '''
+ rados_obj_ls = self._sys_cmd(['rados', '-p', NFS_POOL_NAME, '-N', self.cluster_id, 'ls'])
+
+ if b'export-' in rados_obj_ls or (conf_obj and b'conf-nfs' in rados_obj_ls):
+ self.fail("Delete export failed")
+
+ def _get_port_ip_info(self):
+ '''
+ Return port and ip for a cluster
+ '''
+ #{'test': {'backend': [{'hostname': 'smithi068', 'ip': '172.21.15.68',
+ #'port': 2049}]}}
+ with contextutil.safe_while(sleep=5, tries=6) as proceed:
+ while proceed():
+ try:
+ info_output = json.loads(
+ self._nfs_cmd('cluster', 'info',
+ self.cluster_id))['test']['backend'][0]
+ return info_output["port"], info_output["ip"]
+ except (IndexError, CommandFailedError) as e:
+ if 'list index out of range' in str(e):
+ log.warning('no port and/or ip found, retrying')
+ else:
+ log.warning(f'{e}, retrying')
+
+ def _test_mnt(self, pseudo_path, port, ip, check=True):
+ '''
+ Test mounting of created exports
+ :param pseudo_path: It is the pseudo root name
+ :param port: Port of deployed nfs cluster
+ :param ip: IP of deployed nfs cluster
+ :param check: It denotes if i/o testing needs to be done
+ '''
+ tries = 3
+ while True:
+ try:
+ self.ctx.cluster.run(
+ args=['sudo', 'mount', '-t', 'nfs', '-o', f'port={port}',
+ f'{ip}:{pseudo_path}', '/mnt'])
+ break
+ except CommandFailedError as e:
+ if tries:
+ tries -= 1
+ time.sleep(2)
+ continue
+ # Check if mount failed only when non existing pseudo path is passed
+ if not check and e.exitstatus == 32:
+ return
+ raise
+
+ self.ctx.cluster.run(args=['sudo', 'chmod', '1777', '/mnt'])
+
+ try:
+ self.ctx.cluster.run(args=['touch', '/mnt/test'])
+ out_mnt = self._sys_cmd(['ls', '/mnt'])
+ self.assertEqual(out_mnt, b'test\n')
+ finally:
+ self.ctx.cluster.run(args=['sudo', 'umount', '/mnt'])
+
+ def _write_to_read_only_export(self, pseudo_path, port, ip):
+ '''
+ Check if write to read only export fails
+ '''
+ try:
+ self._test_mnt(pseudo_path, port, ip)
+ except CommandFailedError as e:
+ # Write to cephfs export should fail for test to pass
+ self.assertEqual(
+ e.exitstatus, errno.EPERM,
+ 'invalid error code on trying to write to read-only export')
+ else:
+ self.fail('expected write to a read-only export to fail')
+
+ def _create_cluster_with_fs(self, fs_name, mnt_pt=None):
+ """
+ create a cluster along with fs and mount it to the path supplied
+ :param fs_name: name of CephFS volume to be created
+ :param mnt_pt: mount fs to the path
+ """
+ self._test_create_cluster()
+ self._cmd('fs', 'volume', 'create', fs_name)
+ with contextutil.safe_while(sleep=5, tries=30) as proceed:
+ while proceed():
+ output = self._cmd(
+ 'orch', 'ls', '-f', 'json',
+ '--service-name', f'mds.{fs_name}'
+ )
+ j = json.loads(output)
+ if j[0]['status']['running']:
+ break
+ if mnt_pt:
+ with contextutil.safe_while(sleep=3, tries=3) as proceed:
+ while proceed():
+ try:
+ self.ctx.cluster.run(args=['sudo', 'ceph-fuse', mnt_pt])
+ break
+ except CommandFailedError as e:
+ log.warning(f'{e}, retrying')
+ self.ctx.cluster.run(args=['sudo', 'chmod', '1777', mnt_pt])
+
+ def _delete_cluster_with_fs(self, fs_name, mnt_pt=None, mode=None):
+ """
+ delete cluster along with fs and unmount it from the path supplied
+ :param fs_name: name of CephFS volume to be deleted
+ :param mnt_pt: unmount fs from the path
+ :param mode: revert to this mode
+ """
+ if mnt_pt:
+ self.ctx.cluster.run(args=['sudo', 'umount', mnt_pt])
+ if mode:
+ if isinstance(mode, bytes):
+ mode = mode.decode().strip()
+ self.ctx.cluster.run(args=['sudo', 'chmod', mode, mnt_pt])
+ self._cmd('fs', 'volume', 'rm', fs_name, '--yes-i-really-mean-it')
+ self._test_delete_cluster()
+
+ def test_create_and_delete_cluster(self):
+ '''
+ Test successful creation and deletion of the nfs cluster.
+ '''
+ self._test_create_cluster()
+ self._test_list_cluster()
+ self._test_delete_cluster()
+ # List clusters again to ensure no cluster is shown
+ self._test_list_cluster(empty=True)
+
+ def test_create_delete_cluster_idempotency(self):
+ '''
+ Test idempotency of cluster create and delete commands.
+ '''
+ self._test_idempotency(self._test_create_cluster, ['nfs', 'cluster', 'create', self.cluster_id])
+ self._test_idempotency(self._test_delete_cluster, ['nfs', 'cluster', 'rm', self.cluster_id])
+
+ def test_create_cluster_with_invalid_cluster_id(self):
+ '''
+ Test nfs cluster deployment failure with invalid cluster id.
+ '''
+ try:
+ invalid_cluster_id = '/cluster_test' # Only [A-Za-z0-9-_.] chars are valid
+ self._nfs_cmd('cluster', 'create', invalid_cluster_id)
+ self.fail(f"Cluster successfully created with invalid cluster id {invalid_cluster_id}")
+ except CommandFailedError as e:
+ # Command should fail for test to pass
+ if e.exitstatus != errno.EINVAL:
+ raise
+
+ def test_create_and_delete_export(self):
+ '''
+ Test successful creation and deletion of the cephfs export.
+ '''
+ self._create_default_export()
+ self._test_get_export()
+ port, ip = self._get_port_ip_info()
+ self._test_mnt(self.pseudo_path, port, ip)
+ self._delete_export()
+ # Check if rados export object is deleted
+ self._check_export_obj_deleted()
+ self._test_mnt(self.pseudo_path, port, ip, False)
+ self._test_delete_cluster()
+
+ def test_create_delete_export_idempotency(self):
+ '''
+ Test idempotency of export create and delete commands.
+ '''
+ self._test_idempotency(self._create_default_export, [
+ 'nfs', 'export', 'create', 'cephfs',
+ '--fsname', self.fs_name, '--cluster-id', self.cluster_id,
+ '--pseudo-path', self.pseudo_path])
+ self._test_idempotency(self._delete_export, ['nfs', 'export', 'rm', self.cluster_id,
+ self.pseudo_path])
+ self._test_delete_cluster()
+
+ def test_create_multiple_exports(self):
+ '''
+ Test creating multiple exports with different access type and path.
+ '''
+ # Export-1 with default values (access type = rw and path = '\')
+ self._create_default_export()
+ # Export-2 with r only
+ self._create_export(export_id='2',
+ extra_cmd=['--pseudo-path', self.pseudo_path+'1', '--readonly'])
+ # Export-3 for subvolume with r only
+ self._cmd('fs', 'subvolume', 'create', self.fs_name, 'sub_vol')
+ fs_path = self._cmd('fs', 'subvolume', 'getpath', self.fs_name, 'sub_vol').strip()
+ self._create_export(export_id='3',
+ extra_cmd=['--pseudo-path', self.pseudo_path+'2', '--readonly',
+ '--path', fs_path])
+ # Export-4 for subvolume
+ self._create_export(export_id='4',
+ extra_cmd=['--pseudo-path', self.pseudo_path+'3',
+ '--path', fs_path])
+ # Check if exports gets listed
+ self._test_list_detailed(fs_path)
+ self._test_delete_cluster()
+ # Check if rados ganesha conf object is deleted
+ self._check_export_obj_deleted(conf_obj=True)
+ self._check_auth_ls()
+
+ def test_exports_on_mgr_restart(self):
+ '''
+ Test export availability on restarting mgr.
+ '''
+ self._create_default_export()
+ # unload and load module will restart the mgr
+ self._unload_module("cephadm")
+ self._load_module("cephadm")
+ self._orch_cmd("set", "backend", "cephadm")
+ # Check if ganesha daemon is running
+ self._check_nfs_cluster_status('running', 'Failed to redeploy NFS Ganesha cluster')
+ # Checks if created export is listed
+ self._test_list_export()
+ port, ip = self._get_port_ip_info()
+ self._test_mnt(self.pseudo_path, port, ip)
+ self._delete_export()
+ self._test_delete_cluster()
+
+ def test_export_create_with_non_existing_fsname(self):
+ '''
+ Test creating export with non-existing filesystem.
+ '''
+ try:
+ fs_name = 'nfs-test'
+ self._test_create_cluster()
+ self._nfs_cmd('export', 'create', 'cephfs',
+ '--fsname', fs_name, '--cluster-id', self.cluster_id,
+ '--pseudo-path', self.pseudo_path)
+ self.fail(f"Export created with non-existing filesystem {fs_name}")
+ except CommandFailedError as e:
+ # Command should fail for test to pass
+ if e.exitstatus != errno.ENOENT:
+ raise
+ finally:
+ self._test_delete_cluster()
+
+ def test_export_create_with_non_existing_clusterid(self):
+ '''
+ Test creating cephfs export with non-existing nfs cluster.
+ '''
+ try:
+ cluster_id = 'invalidtest'
+ self._nfs_cmd('export', 'create', 'cephfs', '--fsname', self.fs_name,
+ '--cluster-id', cluster_id, '--pseudo-path', self.pseudo_path)
+ self.fail(f"Export created with non-existing cluster id {cluster_id}")
+ except CommandFailedError as e:
+ # Command should fail for test to pass
+ if e.exitstatus != errno.ENOENT:
+ raise
+
+ def test_export_create_with_relative_pseudo_path_and_root_directory(self):
+ '''
+ Test creating cephfs export with relative or '/' pseudo path.
+ '''
+ def check_pseudo_path(pseudo_path):
+ try:
+ self._nfs_cmd('export', 'create', 'cephfs', '--fsname', self.fs_name,
+ '--cluster-id', self.cluster_id,
+ '--pseudo-path', pseudo_path)
+ self.fail(f"Export created for {pseudo_path}")
+ except CommandFailedError as e:
+ # Command should fail for test to pass
+ if e.exitstatus != errno.EINVAL:
+ raise
+
+ self._test_create_cluster()
+ self._cmd('fs', 'volume', 'create', self.fs_name)
+ check_pseudo_path('invalidpath')
+ check_pseudo_path('/')
+ check_pseudo_path('//')
+ self._cmd('fs', 'volume', 'rm', self.fs_name, '--yes-i-really-mean-it')
+ self._test_delete_cluster()
+
+ def test_write_to_read_only_export(self):
+ '''
+ Test write to readonly export.
+ '''
+ self._test_create_cluster()
+ self._create_export(export_id='1', create_fs=True,
+ extra_cmd=['--pseudo-path', self.pseudo_path, '--readonly'])
+ port, ip = self._get_port_ip_info()
+ self._check_nfs_cluster_status('running', 'NFS Ganesha cluster restart failed')
+ self._write_to_read_only_export(self.pseudo_path, port, ip)
+ self._test_delete_cluster()
+
+ def test_cluster_info(self):
+ '''
+ Test cluster info outputs correct ip and hostname
+ '''
+ self._test_create_cluster()
+ info_output = json.loads(self._nfs_cmd('cluster', 'info', self.cluster_id))
+ print(f'info {info_output}')
+ info_ip = info_output[self.cluster_id].get('backend', [])[0].pop("ip")
+ host_details = {
+ self.cluster_id: {
+ 'backend': [
+ {
+ "hostname": self._sys_cmd(['hostname']).decode("utf-8").strip(),
+ "port": 2049
+ }
+ ],
+ "virtual_ip": None,
+ }
+ }
+ host_ip = self._sys_cmd(['hostname', '-I']).decode("utf-8").split()
+ print(f'host_ip is {host_ip}, info_ip is {info_ip}')
+ self.assertDictEqual(info_output, host_details)
+ self.assertTrue(info_ip in host_ip)
+ self._test_delete_cluster()
+
+ def test_cluster_set_reset_user_config(self):
+ '''
+ Test cluster is created using user config and reverts back to default
+ config on reset.
+ '''
+ self._test_create_cluster()
+
+ pool = NFS_POOL_NAME
+ user_id = 'test'
+ fs_name = 'user_test_fs'
+ pseudo_path = '/ceph'
+ self._cmd('fs', 'volume', 'create', fs_name)
+ time.sleep(20)
+ key = self._cmd('auth', 'get-or-create-key', f'client.{user_id}', 'mon',
+ 'allow r', 'osd',
+ f'allow rw pool={pool} namespace={self.cluster_id}, allow rw tag cephfs data={fs_name}',
+ 'mds', f'allow rw path={self.path}').strip()
+ config = f""" LOG {{
+ Default_log_level = FULL_DEBUG;
+ }}
+
+ EXPORT {{
+ Export_Id = 100;
+ Transports = TCP;
+ Path = /;
+ Pseudo = {pseudo_path};
+ Protocols = 4;
+ Access_Type = RW;
+ Attr_Expiration_Time = 0;
+ Squash = None;
+ FSAL {{
+ Name = CEPH;
+ Filesystem = {fs_name};
+ User_Id = {user_id};
+ Secret_Access_Key = '{key}';
+ }}
+ }}"""
+ port, ip = self._get_port_ip_info()
+ self.ctx.cluster.run(args=['ceph', 'nfs', 'cluster', 'config',
+ 'set', self.cluster_id, '-i', '-'], stdin=config)
+ time.sleep(30)
+ res = self._sys_cmd(['rados', '-p', pool, '-N', self.cluster_id, 'get',
+ f'userconf-nfs.{user_id}', '-'])
+ self.assertEqual(config, res.decode('utf-8'))
+ self._test_mnt(pseudo_path, port, ip)
+ self._nfs_cmd('cluster', 'config', 'reset', self.cluster_id)
+ rados_obj_ls = self._sys_cmd(['rados', '-p', NFS_POOL_NAME, '-N', self.cluster_id, 'ls'])
+ if b'conf-nfs' not in rados_obj_ls and b'userconf-nfs' in rados_obj_ls:
+ self.fail("User config not deleted")
+ time.sleep(30)
+ self._test_mnt(pseudo_path, port, ip, False)
+ self._cmd('fs', 'volume', 'rm', fs_name, '--yes-i-really-mean-it')
+ self._test_delete_cluster()
+
+ def test_cluster_set_user_config_with_non_existing_clusterid(self):
+ '''
+ Test setting user config for non-existing nfs cluster.
+ '''
+ cluster_id = 'invalidtest'
+ with contextutil.safe_while(sleep=3, tries=3) as proceed:
+ while proceed():
+ try:
+ self.ctx.cluster.run(args=['ceph', 'nfs', 'cluster',
+ 'config', 'set', cluster_id,
+ '-i', '-'], stdin='testing')
+ self.fail(f"User config set for non-existing cluster"
+ f"{cluster_id}")
+ except CommandFailedError as e:
+ # Command should fail for test to pass
+ if e.exitstatus == errno.ENOENT:
+ break
+ log.warning('exitstatus != ENOENT, retrying')
+
+ def test_cluster_reset_user_config_with_non_existing_clusterid(self):
+ '''
+ Test resetting user config for non-existing nfs cluster.
+ '''
+ try:
+ cluster_id = 'invalidtest'
+ self._nfs_cmd('cluster', 'config', 'reset', cluster_id)
+ self.fail(f"User config reset for non-existing cluster {cluster_id}")
+ except CommandFailedError as e:
+ # Command should fail for test to pass
+ if e.exitstatus != errno.ENOENT:
+ raise
+
+ def test_create_export_via_apply(self):
+ '''
+ Test creation of export via apply
+ '''
+ self._test_create_cluster()
+ self.ctx.cluster.run(args=['ceph', 'nfs', 'export', 'apply',
+ self.cluster_id, '-i', '-'],
+ stdin=json.dumps({
+ "path": "/",
+ "pseudo": "/cephfs",
+ "squash": "none",
+ "access_type": "rw",
+ "protocols": [4],
+ "fsal": {
+ "name": "CEPH",
+ "fs_name": self.fs_name
+ }
+ }))
+ port, ip = self._get_port_ip_info()
+ self._test_mnt(self.pseudo_path, port, ip)
+ self._check_nfs_cluster_status(
+ 'running', 'NFS Ganesha cluster not running after new export was applied')
+ self._test_delete_cluster()
+
+ def test_update_export(self):
+ '''
+ Test update of export's pseudo path and access type from rw to ro
+ '''
+ self._create_default_export()
+ port, ip = self._get_port_ip_info()
+ self._test_mnt(self.pseudo_path, port, ip)
+ export_block = self._get_export()
+ new_pseudo_path = '/testing'
+ export_block['pseudo'] = new_pseudo_path
+ export_block['access_type'] = 'RO'
+ self.ctx.cluster.run(args=['ceph', 'nfs', 'export', 'apply',
+ self.cluster_id, '-i', '-'],
+ stdin=json.dumps(export_block))
+ if not self._check_nfs_cluster_event('restart'):
+ self.fail("updating export's pseudo path should trigger restart of NFS service")
+ self._check_nfs_cluster_status('running', 'NFS Ganesha cluster not running after restart')
+ self._write_to_read_only_export(new_pseudo_path, port, ip)
+ self._test_delete_cluster()
+
+ def test_update_export_ro_to_rw(self):
+ '''
+ Test update of export's access level from ro to rw
+ '''
+ self._test_create_cluster()
+ self._create_export(
+ export_id='1', create_fs=True,
+ extra_cmd=['--pseudo-path', self.pseudo_path, '--readonly'])
+ port, ip = self._get_port_ip_info()
+ self._write_to_read_only_export(self.pseudo_path, port, ip)
+ export_block = self._get_export()
+ export_block['access_type'] = 'RW'
+ self.ctx.cluster.run(
+ args=['ceph', 'nfs', 'export', 'apply', self.cluster_id, '-i', '-'],
+ stdin=json.dumps(export_block))
+ if self._check_nfs_cluster_event('restart'):
+ self.fail("update of export's access type should not trigger NFS service restart")
+ self._test_mnt(self.pseudo_path, port, ip)
+ self._test_delete_cluster()
+
+ def test_update_export_with_invalid_values(self):
+ '''
+ Test update of export with invalid values
+ '''
+ self._create_default_export()
+ export_block = self._get_export()
+
+ def update_with_invalid_values(key, value, fsal=False):
+ export_block_new = dict(export_block)
+ if fsal:
+ export_block_new['fsal'] = dict(export_block['fsal'])
+ export_block_new['fsal'][key] = value
+ else:
+ export_block_new[key] = value
+ try:
+ self.ctx.cluster.run(args=['ceph', 'nfs', 'export', 'apply',
+ self.cluster_id, '-i', '-'],
+ stdin=json.dumps(export_block_new))
+ except CommandFailedError:
+ pass
+
+ update_with_invalid_values('export_id', 9)
+ update_with_invalid_values('cluster_id', 'testing_new')
+ update_with_invalid_values('pseudo', 'test_relpath')
+ update_with_invalid_values('access_type', 'W')
+ update_with_invalid_values('squash', 'no_squash')
+ update_with_invalid_values('security_label', 'invalid')
+ update_with_invalid_values('protocols', [2])
+ update_with_invalid_values('transports', ['UD'])
+ update_with_invalid_values('name', 'RGW', True)
+ update_with_invalid_values('user_id', 'testing_export', True)
+ update_with_invalid_values('fs_name', 'b', True)
+ self._test_delete_cluster()
+
+ def test_cmds_without_reqd_args(self):
+ '''
+ Test that cmd fails on not passing required arguments
+ '''
+ def exec_cmd_invalid(*cmd):
+ try:
+ self._nfs_cmd(*cmd)
+ self.fail(f"nfs {cmd} command executed successfully without required arguments")
+ except CommandFailedError as e:
+ # Command should fail for test to pass
+ if e.exitstatus != errno.EINVAL:
+ raise
+
+ exec_cmd_invalid('cluster', 'create')
+ exec_cmd_invalid('cluster', 'delete')
+ exec_cmd_invalid('cluster', 'config', 'set')
+ exec_cmd_invalid('cluster', 'config', 'reset')
+ exec_cmd_invalid('export', 'create', 'cephfs')
+ exec_cmd_invalid('export', 'create', 'cephfs', 'clusterid')
+ exec_cmd_invalid('export', 'create', 'cephfs', 'clusterid', 'a_fs')
+ exec_cmd_invalid('export', 'ls')
+ exec_cmd_invalid('export', 'delete')
+ exec_cmd_invalid('export', 'delete', 'clusterid')
+ exec_cmd_invalid('export', 'info')
+ exec_cmd_invalid('export', 'info', 'clusterid')
+ exec_cmd_invalid('export', 'apply')
+
+ def test_non_existent_cluster(self):
+ """
+ Test that cluster info doesn't throw junk data for non-existent cluster
+ """
+ cluster_ls = self._nfs_cmd('cluster', 'ls')
+ self.assertNotIn('foo', cluster_ls, 'cluster foo exists')
+ try:
+ self._nfs_cmd('cluster', 'info', 'foo')
+ self.fail("nfs cluster info foo returned successfully for non-existent cluster")
+ except CommandFailedError as e:
+ if e.exitstatus != errno.ENOENT:
+ raise
+
+ def test_nfs_export_with_invalid_path(self):
+ """
+ Test that nfs exports can't be created with invalid path
+ """
+ mnt_pt = '/mnt'
+ preserve_mode = self._sys_cmd(['stat', '-c', '%a', mnt_pt])
+ self._create_cluster_with_fs(self.fs_name, mnt_pt)
+ try:
+ self._create_export(export_id='123',
+ extra_cmd=['--pseudo-path', self.pseudo_path,
+ '--path', '/non_existent_dir'])
+ except CommandFailedError as e:
+ if e.exitstatus != errno.ENOENT:
+ raise
+ self._delete_cluster_with_fs(self.fs_name, mnt_pt, preserve_mode)
+
+ def test_nfs_export_creation_at_filepath(self):
+ """
+ Test that nfs exports can't be created at a filepath
+ """
+ mnt_pt = '/mnt'
+ preserve_mode = self._sys_cmd(['stat', '-c', '%a', mnt_pt])
+ self._create_cluster_with_fs(self.fs_name, mnt_pt)
+ self.ctx.cluster.run(args=['touch', f'{mnt_pt}/testfile'])
+ try:
+ self._create_export(export_id='123', extra_cmd=['--pseudo-path',
+ self.pseudo_path,
+ '--path',
+ '/testfile'])
+ except CommandFailedError as e:
+ if e.exitstatus != errno.ENOTDIR:
+ raise
+ self.ctx.cluster.run(args=['rm', '-rf', '/mnt/testfile'])
+ self._delete_cluster_with_fs(self.fs_name, mnt_pt, preserve_mode)
+
+ def test_nfs_export_creation_at_symlink(self):
+ """
+ Test that nfs exports can't be created at a symlink path
+ """
+ mnt_pt = '/mnt'
+ preserve_mode = self._sys_cmd(['stat', '-c', '%a', mnt_pt])
+ self._create_cluster_with_fs(self.fs_name, mnt_pt)
+ self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir'])
+ self.ctx.cluster.run(args=['ln', '-s', f'{mnt_pt}/testdir',
+ f'{mnt_pt}/testdir_symlink'])
+ try:
+ self._create_export(export_id='123',
+ extra_cmd=['--pseudo-path',
+ self.pseudo_path,
+ '--path',
+ '/testdir_symlink'])
+ except CommandFailedError as e:
+ if e.exitstatus != errno.ENOTDIR:
+ raise
+ self.ctx.cluster.run(args=['rm', '-rf', f'{mnt_pt}/*'])
+ self._delete_cluster_with_fs(self.fs_name, mnt_pt, preserve_mode)
diff --git a/qa/tasks/cephfs/test_openfiletable.py b/qa/tasks/cephfs/test_openfiletable.py
new file mode 100644
index 000000000..eff6b5093
--- /dev/null
+++ b/qa/tasks/cephfs/test_openfiletable.py
@@ -0,0 +1,85 @@
+import time
+import logging
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+class OpenFileTable(CephFSTestCase):
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 1
+
+ def _check_oft_counter(self, name, count):
+ perf_dump = self.fs.mds_asok(['perf', 'dump'])
+ if perf_dump['oft'][name] == count:
+ return True
+ return False
+
+ def test_max_items_per_obj(self):
+ """
+ The maximum number of openfiles omap objects keys are now equal to
+ osd_deep_scrub_large_omap_object_key_threshold option.
+ """
+ self.set_conf("mds", "osd_deep_scrub_large_omap_object_key_threshold", "5")
+
+ self.fs.mds_restart()
+ self.fs.wait_for_daemons()
+
+ # Write some bytes to a file
+ size_mb = 1
+
+ # Hold the file open
+ file_count = 8
+ for i in range(0, file_count):
+ filename = "open_file{}".format(i)
+ p = self.mount_a.open_background(filename)
+ self.mount_a.write_n_mb(filename, size_mb)
+
+ time.sleep(10)
+
+ """
+ With osd_deep_scrub_large_omap_object_key_threshold value as 5 and
+ opening 8 files we should have a new rados object with name
+ mds0_openfiles.1 to hold the extra keys.
+ """
+
+ self.fs.radosm(["stat", "mds0_openfiles.1"])
+
+ # Now close the file
+ self.mount_a.kill_background(p)
+
+ def test_perf_counters(self):
+ """
+ Opening a file should increment omap_total_updates by 1.
+ """
+
+ self.set_conf("mds", "osd_deep_scrub_large_omap_object_key_threshold", "1")
+ self.fs.mds_restart()
+ self.fs.wait_for_daemons()
+
+ perf_dump = self.fs.mds_asok(['perf', 'dump'])
+ omap_total_updates_0 = perf_dump['oft']['omap_total_updates']
+ log.info("omap_total_updates_0:{}".format(omap_total_updates_0))
+
+ # Open the file
+ p = self.mount_a.open_background("omap_counter_test_file")
+ self.wait_until_true(lambda: self._check_oft_counter('omap_total_updates', 2), timeout=120)
+
+ perf_dump = self.fs.mds_asok(['perf', 'dump'])
+ omap_total_updates_1 = perf_dump['oft']['omap_total_updates']
+ log.info("omap_total_updates_1:{}".format(omap_total_updates_1))
+
+ self.assertTrue((omap_total_updates_1 - omap_total_updates_0) == 2)
+
+ # Now close the file
+ self.mount_a.kill_background(p)
+ # Ensure that the file does not exist any more
+ self.wait_until_true(lambda: self._check_oft_counter('omap_total_removes', 1), timeout=120)
+ self.wait_until_true(lambda: self._check_oft_counter('omap_total_kv_pairs', 1), timeout=120)
+
+ perf_dump = self.fs.mds_asok(['perf', 'dump'])
+ omap_total_removes = perf_dump['oft']['omap_total_removes']
+ omap_total_kv_pairs = perf_dump['oft']['omap_total_kv_pairs']
+ log.info("omap_total_removes:{}".format(omap_total_removes))
+ log.info("omap_total_kv_pairs:{}".format(omap_total_kv_pairs))
+ self.assertTrue(omap_total_removes == 1)
+ self.assertTrue(omap_total_kv_pairs == 1)
diff --git a/qa/tasks/cephfs/test_pool_perm.py b/qa/tasks/cephfs/test_pool_perm.py
new file mode 100644
index 000000000..9912debed
--- /dev/null
+++ b/qa/tasks/cephfs/test_pool_perm.py
@@ -0,0 +1,109 @@
+from textwrap import dedent
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+import os
+
+
+class TestPoolPerm(CephFSTestCase):
+ def test_pool_perm(self):
+ self.mount_a.run_shell(["touch", "test_file"])
+
+ file_path = os.path.join(self.mount_a.mountpoint, "test_file")
+
+ remote_script = dedent("""
+ import os
+ import errno
+
+ fd = os.open("{path}", os.O_RDWR)
+ try:
+ if {check_read}:
+ ret = os.read(fd, 1024)
+ else:
+ os.write(fd, b'content')
+ except OSError as e:
+ if e.errno != errno.EPERM:
+ raise
+ else:
+ raise RuntimeError("client does not check permission of data pool")
+ """)
+
+ client_name = "client.{0}".format(self.mount_a.client_id)
+
+ # set data pool read only
+ self.fs.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd',
+ 'allow r pool={0}'.format(self.fs.get_data_pool_name()))
+
+ self.mount_a.umount_wait()
+ self.mount_a.mount_wait()
+
+ # write should fail
+ self.mount_a.run_python(remote_script.format(path=file_path, check_read=str(False)))
+
+ # set data pool write only
+ self.fs.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd',
+ 'allow w pool={0}'.format(self.fs.get_data_pool_name()))
+
+ self.mount_a.umount_wait()
+ self.mount_a.mount_wait()
+
+ # read should fail
+ self.mount_a.run_python(remote_script.format(path=file_path, check_read=str(True)))
+
+ def test_forbidden_modification(self):
+ """
+ That a client who does not have the capability for setting
+ layout pools is prevented from doing so.
+ """
+
+ # Set up
+ client_name = "client.{0}".format(self.mount_a.client_id)
+ new_pool_name = "data_new"
+ self.fs.add_data_pool(new_pool_name)
+
+ self.mount_a.run_shell(["touch", "layoutfile"])
+ self.mount_a.run_shell(["mkdir", "layoutdir"])
+
+ # Set MDS 'rw' perms: missing 'p' means no setting pool layouts
+ self.fs.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', client_name, 'mds', 'allow rw', 'mon', 'allow r',
+ 'osd',
+ 'allow rw pool={0},allow rw pool={1}'.format(
+ self.fs.get_data_pool_names()[0],
+ self.fs.get_data_pool_names()[1],
+ ))
+
+ self.mount_a.umount_wait()
+ self.mount_a.mount_wait()
+
+ with self.assertRaises(CommandFailedError):
+ self.mount_a.setfattr("layoutfile", "ceph.file.layout.pool",
+ new_pool_name)
+ with self.assertRaises(CommandFailedError):
+ self.mount_a.setfattr("layoutdir", "ceph.dir.layout.pool",
+ new_pool_name)
+ self.mount_a.umount_wait()
+
+ # Set MDS 'rwp' perms: should now be able to set layouts
+ self.fs.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', client_name, 'mds', 'allow rwp', 'mon', 'allow r',
+ 'osd',
+ 'allow rw pool={0},allow rw pool={1}'.format(
+ self.fs.get_data_pool_names()[0],
+ self.fs.get_data_pool_names()[1],
+ ))
+ self.mount_a.mount_wait()
+ self.mount_a.setfattr("layoutfile", "ceph.file.layout.pool",
+ new_pool_name)
+ self.mount_a.setfattr("layoutdir", "ceph.dir.layout.pool",
+ new_pool_name)
+ self.mount_a.umount_wait()
+
+ def tearDown(self):
+ self.fs.mon_manager.raw_cluster_cmd_result(
+ 'auth', 'caps', "client.{0}".format(self.mount_a.client_id),
+ 'mds', 'allow', 'mon', 'allow r', 'osd',
+ 'allow rw pool={0}'.format(self.fs.get_data_pool_names()[0]))
+ super(TestPoolPerm, self).tearDown()
+
diff --git a/qa/tasks/cephfs/test_quota.py b/qa/tasks/cephfs/test_quota.py
new file mode 100644
index 000000000..0386672bd
--- /dev/null
+++ b/qa/tasks/cephfs/test_quota.py
@@ -0,0 +1,106 @@
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+from teuthology.exceptions import CommandFailedError
+
+class TestQuota(CephFSTestCase):
+ CLIENTS_REQUIRED = 2
+ MDSS_REQUIRED = 1
+
+ def test_remote_update_getfattr(self):
+ """
+ That quota changes made from one client are visible to another
+ client looking at ceph.quota xattrs
+ """
+ self.mount_a.run_shell(["mkdir", "subdir"])
+
+ self.assertEqual(
+ self.mount_a.getfattr("./subdir", "ceph.quota.max_files"),
+ None)
+ self.assertEqual(
+ self.mount_b.getfattr("./subdir", "ceph.quota.max_files"),
+ None)
+
+ self.mount_a.setfattr("./subdir", "ceph.quota.max_files", "10")
+ self.assertEqual(
+ self.mount_a.getfattr("./subdir", "ceph.quota.max_files"),
+ "10")
+
+ # Should be visible as soon as setxattr operation completes on
+ # mds (we get here sooner because setfattr gets an early reply)
+ self.wait_until_equal(
+ lambda: self.mount_b.getfattr("./subdir", "ceph.quota.max_files"),
+ "10", timeout=10)
+
+ def test_remote_update_df(self):
+ """
+ That when a client modifies the quota on a directory used
+ as another client's root, the other client sees the change
+ reflected in their statfs output.
+ """
+
+ self.mount_b.umount_wait()
+
+ self.mount_a.run_shell(["mkdir", "subdir"])
+
+ size_before = 1024 * 1024 * 128
+ self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes",
+ "%s" % size_before)
+
+ self.mount_b.mount_wait(cephfs_mntpt="/subdir")
+
+ self.assertDictEqual(
+ self.mount_b.df(),
+ {
+ "total": size_before,
+ "used": 0,
+ "available": size_before
+ })
+
+ size_after = 1024 * 1024 * 256
+ self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes",
+ "%s" % size_after)
+
+ # Should be visible as soon as setxattr operation completes on
+ # mds (we get here sooner because setfattr gets an early reply)
+ self.wait_until_equal(
+ lambda: self.mount_b.df(),
+ {
+ "total": size_after,
+ "used": 0,
+ "available": size_after
+ },
+ timeout=10
+ )
+
+ def test_remote_update_write(self):
+ """
+ That when a client modifies the quota on a directory used
+ as another client's root, the other client sees the effect
+ of the change when writing data.
+ """
+
+ self.mount_a.run_shell(["mkdir", "subdir_files"])
+ self.mount_a.run_shell(["mkdir", "subdir_data"])
+
+ # Set some nice high quotas that mount_b's initial operations
+ # will be well within
+ self.mount_a.setfattr("./subdir_files", "ceph.quota.max_files", "100")
+ self.mount_a.setfattr("./subdir_data", "ceph.quota.max_bytes", "104857600")
+
+ # Do some writes within my quota
+ self.mount_b.create_n_files("subdir_files/file", 20)
+ self.mount_b.write_n_mb("subdir_data/file", 20)
+
+ # Set quotas lower than what mount_b already wrote, it should
+ # refuse to write more once it's seen them
+ self.mount_a.setfattr("./subdir_files", "ceph.quota.max_files", "10")
+ self.mount_a.setfattr("./subdir_data", "ceph.quota.max_bytes", "1048576")
+
+ # Do some writes that would have been okay within the old quota,
+ # but are forbidden under the new quota
+ with self.assertRaises(CommandFailedError):
+ self.mount_b.create_n_files("subdir_files/file", 40)
+ with self.assertRaises(CommandFailedError):
+ self.mount_b.write_n_mb("subdir_data/file", 40)
+
diff --git a/qa/tasks/cephfs/test_readahead.py b/qa/tasks/cephfs/test_readahead.py
new file mode 100644
index 000000000..7e6270f03
--- /dev/null
+++ b/qa/tasks/cephfs/test_readahead.py
@@ -0,0 +1,26 @@
+import logging
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+
+class TestReadahead(CephFSTestCase):
+ def test_flush(self):
+ # Create 32MB file
+ self.mount_a.run_shell(["dd", "if=/dev/urandom", "of=foo", "bs=1M", "count=32"])
+
+ # Unmount and remount the client to flush cache
+ self.mount_a.umount_wait()
+ self.mount_a.mount_wait()
+
+ initial_op_read = self.mount_a.get_op_read_count()
+ self.mount_a.run_shell(["dd", "if=foo", "of=/dev/null", "bs=128k", "count=32"])
+ op_read = self.mount_a.get_op_read_count()
+ self.assertGreaterEqual(op_read, initial_op_read)
+ op_read -= initial_op_read
+ log.info("read operations: {0}".format(op_read))
+
+ # with exponentially increasing readahead, we should see fewer than 10 operations
+ # but this test simply checks if the client is doing a remote read for each local read
+ if op_read >= 32:
+ raise RuntimeError("readahead not working")
diff --git a/qa/tasks/cephfs/test_recovery_fs.py b/qa/tasks/cephfs/test_recovery_fs.py
new file mode 100644
index 000000000..bbcdf9769
--- /dev/null
+++ b/qa/tasks/cephfs/test_recovery_fs.py
@@ -0,0 +1,38 @@
+import logging
+from os.path import join as os_path_join
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+class TestFSRecovery(CephFSTestCase):
+ """
+ Tests for recovering FS after loss of FSMap
+ """
+
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 3
+
+ def test_recover_fs_after_fsmap_removal(self):
+ data_pool = self.fs.get_data_pool_name()
+ metadata_pool = self.fs.get_metadata_pool_name()
+ # write data in mount, and fsync
+ self.mount_a.create_n_files('file_on_fs', 1, sync=True)
+ # faild MDSs to allow removing the file system in the next step
+ self.fs.fail()
+ # Remove file system to lose FSMap and keep the pools intact.
+ # This mimics the scenario where the monitor store is rebuilt
+ # using OSDs to recover a cluster with corrupt monitor store.
+ # The FSMap is permanently lost, but the FS pools are
+ # recovered/intact
+ self.fs.rm()
+ # Recreate file system with pool and previous fscid
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'fs', 'new', self.fs.name, metadata_pool, data_pool,
+ '--recover', '--force', '--fscid', f'{self.fs.id}')
+ self.fs.set_joinable()
+ # Check status of file system
+ self.fs.wait_for_daemons()
+ # check data in file sytem is intact
+ filepath = os_path_join(self.mount_a.hostfs_mntpt, 'file_on_fs_0')
+ self.assertEqual(self.mount_a.read_file(filepath), "0")
diff --git a/qa/tasks/cephfs/test_recovery_pool.py b/qa/tasks/cephfs/test_recovery_pool.py
new file mode 100644
index 000000000..8c4e1967d
--- /dev/null
+++ b/qa/tasks/cephfs/test_recovery_pool.py
@@ -0,0 +1,179 @@
+"""
+Test our tools for recovering metadata from the data pool into an alternate pool
+"""
+
+import logging
+import traceback
+from collections import namedtuple
+
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+
+ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
+
+
+class OverlayWorkload(object):
+ def __init__(self):
+ self._initial_state = None
+
+ # Accumulate backtraces for every failed validation, and return them. Backtraces
+ # are rather verbose, but we only see them when something breaks, and they
+ # let us see which check failed without having to decorate each check with
+ # a string
+ self._errors = []
+
+ def assert_equal(self, a, b):
+ try:
+ if a != b:
+ raise AssertionError("{0} != {1}".format(a, b))
+ except AssertionError as e:
+ self._errors.append(
+ ValidationError(e, traceback.format_exc(3))
+ )
+
+ def write(self):
+ """
+ Write the workload files to the mount
+ """
+ raise NotImplementedError()
+
+ def validate(self):
+ """
+ Read from the mount and validate that the workload files are present (i.e. have
+ survived or been reconstructed from the test scenario)
+ """
+ raise NotImplementedError()
+
+ def damage(self, fs):
+ """
+ Damage the filesystem pools in ways that will be interesting to recover from. By
+ default just wipe everything in the metadata pool
+ """
+
+ pool = fs.get_metadata_pool_name()
+ fs.rados(["purge", pool, '--yes-i-really-really-mean-it'])
+
+ def flush(self, fs):
+ """
+ Called after client unmount, after write: flush whatever you want
+ """
+ fs.rank_asok(["flush", "journal"])
+
+
+class SimpleOverlayWorkload(OverlayWorkload):
+ """
+ Single file, single directory, check that it gets recovered and so does its size
+ """
+ def write(self, mount):
+ mount.run_shell(["mkdir", "subdir"])
+ mount.write_n_mb("subdir/sixmegs", 6)
+ self._initial_state = mount.stat("subdir/sixmegs")
+
+ def validate(self, recovery_mount):
+ recovery_mount.run_shell(["ls", "subdir"])
+ st = recovery_mount.stat("subdir/sixmegs")
+ self.assert_equal(st['st_size'], self._initial_state['st_size'])
+ return self._errors
+
+class TestRecoveryPool(CephFSTestCase):
+ MDSS_REQUIRED = 2
+ CLIENTS_REQUIRED = 1
+ REQUIRE_RECOVERY_FILESYSTEM = True
+
+ def is_marked_damaged(self, rank):
+ mds_map = self.fs.get_mds_map()
+ return rank in mds_map['damaged']
+
+ def _rebuild_metadata(self, workload, other_pool=None, workers=1):
+ """
+ That when all objects in metadata pool are removed, we can rebuild a metadata pool
+ based on the contents of a data pool, and a client can see and read our files.
+ """
+
+ # First, inject some files
+
+ workload.write(self.mount_a)
+
+ # Unmount the client and flush the journal: the tool should also cope with
+ # situations where there is dirty metadata, but we'll test that separately
+ self.mount_a.umount_wait()
+ workload.flush(self.fs)
+ self.fs.fail()
+
+ # After recovery, we need the MDS to not be strict about stats (in production these options
+ # are off by default, but in QA we need to explicitly disable them)
+ # Note: these have to be written to ceph.conf to override existing ceph.conf values.
+ self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
+ self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
+ self.fs.mds_restart()
+
+ # Apply any data damage the workload wants
+ workload.damage(self.fs)
+
+ # Create the alternate pool if requested
+ recovery_fs = self.mds_cluster.newfs(name="recovery_fs", create=False)
+ recovery_fs.set_data_pool_name(self.fs.get_data_pool_name())
+ recovery_fs.create(recover=True, metadata_overlay=True)
+
+ recovery_pool = recovery_fs.get_metadata_pool_name()
+ recovery_fs.mon_manager.raw_cluster_cmd('-s')
+
+ # Reset the MDS map in case multiple ranks were in play: recovery procedure
+ # only understands how to rebuild metadata under rank 0
+ #self.fs.reset()
+ #self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
+ #self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
+ #self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])
+
+ # Run the recovery procedure
+ recovery_fs.data_scan(['init', '--force-init',
+ '--filesystem', recovery_fs.name,
+ '--alternate-pool', recovery_pool])
+ recovery_fs.table_tool([recovery_fs.name + ":0", "reset", "session"])
+ recovery_fs.table_tool([recovery_fs.name + ":0", "reset", "snap"])
+ recovery_fs.table_tool([recovery_fs.name + ":0", "reset", "inode"])
+ if False:
+ with self.assertRaises(CommandFailedError):
+ # Normal reset should fail when no objects are present, we'll use --force instead
+ self.fs.journal_tool(["journal", "reset"], 0)
+
+ recovery_fs.data_scan(['scan_extents', '--alternate-pool',
+ recovery_pool, '--filesystem', self.fs.name,
+ self.fs.get_data_pool_name()])
+ recovery_fs.data_scan(['scan_inodes', '--alternate-pool',
+ recovery_pool, '--filesystem', self.fs.name,
+ '--force-corrupt', '--force-init',
+ self.fs.get_data_pool_name()])
+ recovery_fs.data_scan(['scan_links', '--filesystem', recovery_fs.name])
+ recovery_fs.journal_tool(['event', 'recover_dentries', 'list',
+ '--alternate-pool', recovery_pool], 0)
+ recovery_fs.journal_tool(["journal", "reset", "--force"], 0)
+
+ # Start the MDS
+ recovery_fs.set_joinable()
+ status = recovery_fs.wait_for_daemons()
+
+ self.config_set('mds', 'debug_mds', '20')
+ for rank in recovery_fs.get_ranks(status=status):
+ recovery_fs.rank_tell(['scrub', 'start', '/', 'force,recursive,repair'], rank=rank['rank'], status=status)
+ log.info(str(recovery_fs.status()))
+
+ # Mount a client
+ self.mount_a.mount_wait(cephfs_name=recovery_fs.name)
+
+ # See that the files are present and correct
+ errors = workload.validate(self.mount_a)
+ if errors:
+ log.error("Validation errors found: {0}".format(len(errors)))
+ for e in errors:
+ log.error(e.exception)
+ log.error(e.backtrace)
+ raise AssertionError("Validation failed, first error: {0}\n{1}".format(
+ errors[0].exception, errors[0].backtrace
+ ))
+
+ def test_rebuild_simple(self):
+ self._rebuild_metadata(SimpleOverlayWorkload())
diff --git a/qa/tasks/cephfs/test_scrub.py b/qa/tasks/cephfs/test_scrub.py
new file mode 100644
index 000000000..647860129
--- /dev/null
+++ b/qa/tasks/cephfs/test_scrub.py
@@ -0,0 +1,187 @@
+"""
+Test CephFS scrub (distinct from OSD scrub) functionality
+"""
+
+from io import BytesIO
+import logging
+from collections import namedtuple
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
+
+
+class Workload(CephFSTestCase):
+ def __init__(self, test, filesystem, mount):
+ super().__init__()
+ self._test = test
+ self._mount = mount
+ self._filesystem = filesystem
+ self._initial_state = None
+
+ # Accumulate backtraces for every failed validation, and return them. Backtraces
+ # are rather verbose, but we only see them when something breaks, and they
+ # let us see which check failed without having to decorate each check with
+ # a string
+ self._errors = []
+
+ def write(self):
+ """
+ Write the workload files to the mount
+ """
+ raise NotImplementedError()
+
+ def validate(self):
+ """
+ Read from the mount and validate that the workload files are present (i.e. have
+ survived or been reconstructed from the test scenario)
+ """
+ raise NotImplementedError()
+
+ def damage(self):
+ """
+ Damage the filesystem pools in ways that will be interesting to recover from. By
+ default just wipe everything in the metadata pool
+ """
+ # Delete every object in the metadata pool
+ pool = self._filesystem.get_metadata_pool_name()
+ self._filesystem.rados(["purge", pool, '--yes-i-really-really-mean-it'])
+
+ def flush(self):
+ """
+ Called after client unmount, after write: flush whatever you want
+ """
+ self._filesystem.mds_asok(["flush", "journal"])
+
+
+class BacktraceWorkload(Workload):
+ """
+ Single file, single directory, wipe the backtrace and check it.
+ """
+ def write(self):
+ self._mount.run_shell(["mkdir", "subdir"])
+ self._mount.write_n_mb("subdir/sixmegs", 6)
+
+ def validate(self):
+ st = self._mount.stat("subdir/sixmegs")
+ self._filesystem.mds_asok(["flush", "journal"])
+ bt = self._filesystem.read_backtrace(st['st_ino'])
+ parent = bt['ancestors'][0]['dname']
+ self.assertEqual(parent, 'sixmegs')
+ return self._errors
+
+ def damage(self):
+ st = self._mount.stat("subdir/sixmegs")
+ self._filesystem.mds_asok(["flush", "journal"])
+ self._filesystem._write_data_xattr(st['st_ino'], "parent", "")
+
+ def create_files(self, nfiles=1000):
+ self._mount.create_n_files("scrub-new-files/file", nfiles)
+
+
+class DupInodeWorkload(Workload):
+ """
+ Duplicate an inode and try scrubbing it twice."
+ """
+
+ def write(self):
+ self._mount.run_shell(["mkdir", "parent"])
+ self._mount.run_shell(["mkdir", "parent/child"])
+ self._mount.write_n_mb("parent/parentfile", 6)
+ self._mount.write_n_mb("parent/child/childfile", 6)
+
+ def damage(self):
+ self._mount.umount_wait()
+ self._filesystem.mds_asok(["flush", "journal"])
+ self._filesystem.fail()
+ d = self._filesystem.radosmo(["getomapval", "10000000000.00000000", "parentfile_head", "-"])
+ self._filesystem.radosm(["setomapval", "10000000000.00000000", "shadow_head"], stdin=BytesIO(d))
+ self._test.config_set('mds', 'mds_hack_allow_loading_invalid_metadata', True)
+ self._filesystem.set_joinable()
+ self._filesystem.wait_for_daemons()
+
+ def validate(self):
+ out_json = self._filesystem.run_scrub(["start", "/", "recursive,repair"])
+ self.assertNotEqual(out_json, None)
+ self.assertEqual(out_json["return_code"], 0)
+ self.assertEqual(self._filesystem.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+ self.assertTrue(self._filesystem.are_daemons_healthy())
+ return self._errors
+
+
+class TestScrub(CephFSTestCase):
+ MDSS_REQUIRED = 1
+
+ def setUp(self):
+ super().setUp()
+
+ def _scrub(self, workload, workers=1):
+ """
+ That when all objects in metadata pool are removed, we can rebuild a metadata pool
+ based on the contents of a data pool, and a client can see and read our files.
+ """
+
+ # First, inject some files
+
+ workload.write()
+
+ # are off by default, but in QA we need to explicitly disable them)
+ self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
+ self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
+
+ # Apply any data damage the workload wants
+ workload.damage()
+
+ out_json = self.fs.run_scrub(["start", "/", "recursive,repair"])
+ self.assertNotEqual(out_json, None)
+ self.assertEqual(out_json["return_code"], 0)
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+ # See that the files are present and correct
+ errors = workload.validate()
+ if errors:
+ log.error("Validation errors found: {0}".format(len(errors)))
+ for e in errors:
+ log.error(e.exception)
+ log.error(e.backtrace)
+ raise AssertionError("Validation failed, first error: {0}\n{1}".format(
+ errors[0].exception, errors[0].backtrace
+ ))
+
+ def _get_damage_count(self, damage_type='backtrace'):
+ out_json = self.fs.rank_tell(["damage", "ls"])
+ self.assertNotEqual(out_json, None)
+
+ damage_count = 0
+ for it in out_json:
+ if it['damage_type'] == damage_type:
+ damage_count += 1
+ return damage_count
+
+ def _scrub_new_files(self, workload):
+ """
+ That scrubbing new files does not lead to errors
+ """
+ workload.create_files(1000)
+ self.fs.wait_until_scrub_complete()
+ self.assertEqual(self._get_damage_count(), 0)
+
+ def test_scrub_backtrace_for_new_files(self):
+ self._scrub_new_files(BacktraceWorkload(self, self.fs, self.mount_a))
+
+ def test_scrub_backtrace(self):
+ self._scrub(BacktraceWorkload(self, self.fs, self.mount_a))
+
+ def test_scrub_dup_inode(self):
+ self._scrub(DupInodeWorkload(self, self.fs, self.mount_a))
+
+ def test_mdsdir_scrub_backtrace(self):
+ damage_count = self._get_damage_count()
+ self.assertNotIn("MDS_DAMAGE", self.mds_cluster.mon_manager.get_mon_health()['checks'])
+
+ out_json = self.fs.run_scrub(["start", "~mdsdir", "recursive"])
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+ self.assertEqual(self._get_damage_count(), damage_count)
+ self.assertNotIn("MDS_DAMAGE", self.mds_cluster.mon_manager.get_mon_health()['checks'])
diff --git a/qa/tasks/cephfs/test_scrub_checks.py b/qa/tasks/cephfs/test_scrub_checks.py
new file mode 100644
index 000000000..e41b997a6
--- /dev/null
+++ b/qa/tasks/cephfs/test_scrub_checks.py
@@ -0,0 +1,462 @@
+"""
+MDS admin socket scrubbing-related tests.
+"""
+import json
+import logging
+import errno
+import time
+from teuthology.exceptions import CommandFailedError
+from teuthology.contextutil import safe_while
+import os
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+class TestScrubControls(CephFSTestCase):
+ """
+ Test basic scrub control operations such as abort, pause and resume.
+ """
+
+ MDSS_REQUIRED = 2
+ CLIENTS_REQUIRED = 1
+
+ def _abort_scrub(self, expected):
+ res = self.fs.run_scrub(["abort"])
+ self.assertEqual(res['return_code'], expected)
+ def _pause_scrub(self, expected):
+ res = self.fs.run_scrub(["pause"])
+ self.assertEqual(res['return_code'], expected)
+ def _resume_scrub(self, expected):
+ res = self.fs.run_scrub(["resume"])
+ self.assertEqual(res['return_code'], expected)
+ def _check_task_status(self, expected_status, timo=120):
+ """ check scrub status for current active mds in ceph status """
+ with safe_while(sleep=1, tries=120, action='wait for task status') as proceed:
+ while proceed():
+ active = self.fs.get_active_names()
+ log.debug("current active={0}".format(active))
+ task_status = self.fs.get_task_status("scrub status")
+ try:
+ if task_status[active[0]].startswith(expected_status):
+ return True
+ except KeyError:
+ pass
+
+ def _check_task_status_na(self, timo=120):
+ """ check absence of scrub status in ceph status """
+ with safe_while(sleep=1, tries=120, action='wait for task status') as proceed:
+ while proceed():
+ active = self.fs.get_active_names()
+ log.debug("current active={0}".format(active))
+ task_status = self.fs.get_task_status("scrub status")
+ if not active[0] in task_status:
+ return True
+
+ def create_scrub_data(self, test_dir):
+ for i in range(32):
+ dirname = "dir.{0}".format(i)
+ dirpath = os.path.join(test_dir, dirname)
+ self.mount_a.run_shell_payload(f"""
+set -e
+mkdir -p {dirpath}
+for ((i = 0; i < 32; i++)); do
+ dd if=/dev/urandom of={dirpath}/filename.$i bs=1M conv=fdatasync count=1
+done
+""")
+
+ def test_scrub_abort(self):
+ test_dir = "scrub_control_test_path"
+ abs_test_path = "/{0}".format(test_dir)
+
+ self.create_scrub_data(test_dir)
+
+ out_json = self.fs.run_scrub(["start", abs_test_path, "recursive"])
+ self.assertNotEqual(out_json, None)
+
+ # abort and verify
+ self._abort_scrub(0)
+ self.fs.wait_until_scrub_complete(sleep=5, timeout=30)
+
+ # sleep enough to fetch updated task status
+ checked = self._check_task_status_na()
+ self.assertTrue(checked)
+
+ def test_scrub_pause_and_resume(self):
+ test_dir = "scrub_control_test_path"
+ abs_test_path = "/{0}".format(test_dir)
+
+ log.info("mountpoint: {0}".format(self.mount_a.mountpoint))
+ client_path = os.path.join(self.mount_a.mountpoint, test_dir)
+ log.info("client_path: {0}".format(client_path))
+
+ self.create_scrub_data(test_dir)
+
+ out_json = self.fs.run_scrub(["start", abs_test_path, "recursive"])
+ self.assertNotEqual(out_json, None)
+
+ # pause and verify
+ self._pause_scrub(0)
+ out_json = self.fs.get_scrub_status()
+ self.assertTrue("PAUSED" in out_json['status'])
+
+ checked = self._check_task_status("paused")
+ self.assertTrue(checked)
+
+ # resume and verify
+ self._resume_scrub(0)
+ out_json = self.fs.get_scrub_status()
+ self.assertFalse("PAUSED" in out_json['status'])
+
+ checked = self._check_task_status_na()
+ self.assertTrue(checked)
+
+ def test_scrub_pause_and_resume_with_abort(self):
+ test_dir = "scrub_control_test_path"
+ abs_test_path = "/{0}".format(test_dir)
+
+ self.create_scrub_data(test_dir)
+
+ out_json = self.fs.run_scrub(["start", abs_test_path, "recursive"])
+ self.assertNotEqual(out_json, None)
+
+ # pause and verify
+ self._pause_scrub(0)
+ out_json = self.fs.get_scrub_status()
+ self.assertTrue("PAUSED" in out_json['status'])
+
+ checked = self._check_task_status("paused")
+ self.assertTrue(checked)
+
+ # abort and verify
+ self._abort_scrub(0)
+ out_json = self.fs.get_scrub_status()
+ self.assertTrue("PAUSED" in out_json['status'])
+ self.assertTrue("0 inodes" in out_json['status'])
+
+ # scrub status should still be paused...
+ checked = self._check_task_status("paused")
+ self.assertTrue(checked)
+
+ # resume and verify
+ self._resume_scrub(0)
+ self.assertTrue(self.fs.wait_until_scrub_complete(sleep=5, timeout=30))
+
+ checked = self._check_task_status_na()
+ self.assertTrue(checked)
+
+ def test_scrub_task_status_on_mds_failover(self):
+ (original_active, ) = self.fs.get_active_names()
+ original_standbys = self.mds_cluster.get_standby_daemons()
+
+ test_dir = "scrub_control_test_path"
+ abs_test_path = "/{0}".format(test_dir)
+
+ self.create_scrub_data(test_dir)
+
+ out_json = self.fs.run_scrub(["start", abs_test_path, "recursive"])
+ self.assertNotEqual(out_json, None)
+
+ # pause and verify
+ self._pause_scrub(0)
+ out_json = self.fs.get_scrub_status()
+ self.assertTrue("PAUSED" in out_json['status'])
+
+ checked = self._check_task_status("paused")
+ self.assertTrue(checked)
+
+ # Kill the rank 0
+ self.fs.mds_stop(original_active)
+
+ def promoted():
+ active = self.fs.get_active_names()
+ return active and active[0] in original_standbys
+
+ log.info("Waiting for promotion of one of the original standbys {0}".format(
+ original_standbys))
+ self.wait_until_true(promoted, timeout=self.fs.beacon_timeout)
+
+ self._check_task_status_na()
+
+class TestScrubChecks(CephFSTestCase):
+ """
+ Run flush and scrub commands on the specified files in the filesystem. This
+ task will run through a sequence of operations, but it is not comprehensive
+ on its own -- it doesn't manipulate the mds cache state to test on both
+ in- and out-of-memory parts of the hierarchy. So it's designed to be run
+ multiple times within a single test run, so that the test can manipulate
+ memory state.
+
+ Usage:
+ mds_scrub_checks:
+ mds_rank: 0
+ path: path/to/test/dir
+ client: 0
+ run_seq: [0-9]+
+
+ Increment the run_seq on subsequent invocations within a single test run;
+ it uses that value to generate unique folder and file names.
+ """
+
+ MDSS_REQUIRED = 1
+ CLIENTS_REQUIRED = 1
+
+ def test_scrub_checks(self):
+ self._checks(0)
+ self._checks(1)
+
+ def _checks(self, run_seq):
+ mds_rank = 0
+ test_dir = "scrub_test_path"
+
+ abs_test_path = "/{0}".format(test_dir)
+
+ log.info("mountpoint: {0}".format(self.mount_a.mountpoint))
+ client_path = os.path.join(self.mount_a.mountpoint, test_dir)
+ log.info("client_path: {0}".format(client_path))
+
+ log.info("Cloning repo into place")
+ repo_path = TestScrubChecks.clone_repo(self.mount_a, client_path)
+
+ log.info("Initiating mds_scrub_checks on mds.{id_} test_path {path}, run_seq {seq}".format(
+ id_=mds_rank, path=abs_test_path, seq=run_seq)
+ )
+
+
+ success_validator = lambda j, r: self.json_validator(j, r, "return_code", 0)
+
+ nep = "{test_path}/i/dont/exist".format(test_path=abs_test_path)
+ self.asok_command(mds_rank, "flush_path {nep}".format(nep=nep),
+ lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT))
+ self.tell_command(mds_rank, "scrub start {nep}".format(nep=nep),
+ lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT))
+
+ test_repo_path = "{test_path}/ceph-qa-suite".format(test_path=abs_test_path)
+ dirpath = "{repo_path}/suites".format(repo_path=test_repo_path)
+
+ if run_seq == 0:
+ log.info("First run: flushing {dirpath}".format(dirpath=dirpath))
+ command = "flush_path {dirpath}".format(dirpath=dirpath)
+ self.asok_command(mds_rank, command, success_validator)
+ command = "scrub start {dirpath}".format(dirpath=dirpath)
+ self.tell_command(mds_rank, command, success_validator)
+
+ filepath = "{repo_path}/suites/fs/verify/validater/valgrind.yaml".format(
+ repo_path=test_repo_path)
+ if run_seq == 0:
+ log.info("First run: flushing {filepath}".format(filepath=filepath))
+ command = "flush_path {filepath}".format(filepath=filepath)
+ self.asok_command(mds_rank, command, success_validator)
+ command = "scrub start {filepath}".format(filepath=filepath)
+ self.tell_command(mds_rank, command, success_validator)
+
+ if run_seq == 0:
+ log.info("First run: flushing base dir /")
+ command = "flush_path /"
+ self.asok_command(mds_rank, command, success_validator)
+ command = "scrub start /"
+ self.tell_command(mds_rank, command, success_validator)
+
+ new_dir = "{repo_path}/new_dir_{i}".format(repo_path=repo_path, i=run_seq)
+ test_new_dir = "{repo_path}/new_dir_{i}".format(repo_path=test_repo_path,
+ i=run_seq)
+ self.mount_a.run_shell(["mkdir", new_dir])
+ command = "flush_path {dir}".format(dir=test_new_dir)
+ self.asok_command(mds_rank, command, success_validator)
+
+ new_file = "{repo_path}/new_file_{i}".format(repo_path=repo_path,
+ i=run_seq)
+ test_new_file = "{repo_path}/new_file_{i}".format(repo_path=test_repo_path,
+ i=run_seq)
+ self.mount_a.write_n_mb(new_file, 1)
+
+ command = "flush_path {file}".format(file=test_new_file)
+ self.asok_command(mds_rank, command, success_validator)
+
+ # check that scrub fails on errors
+ ino = self.mount_a.path_to_ino(new_file)
+ rados_obj_name = "{ino:x}.00000000".format(ino=ino)
+ command = "scrub start {file}".format(file=test_new_file)
+
+ def _check_and_clear_damage(ino, dtype):
+ all_damage = self.fs.rank_tell(["damage", "ls"], mds_rank)
+ damage = [d for d in all_damage if d['ino'] == ino and d['damage_type'] == dtype]
+ for d in damage:
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[mds_rank]),
+ "damage", "rm", str(d['id']))
+ return len(damage) > 0
+
+ # Missing parent xattr
+ self.assertFalse(_check_and_clear_damage(ino, "backtrace"));
+ self.fs.rados(["rmxattr", rados_obj_name, "parent"], pool=self.fs.get_data_pool_name())
+ self.tell_command(mds_rank, command, success_validator)
+ self.fs.wait_until_scrub_complete(sleep=5, timeout=30)
+ self.assertTrue(_check_and_clear_damage(ino, "backtrace"));
+
+ command = "flush_path /"
+ self.asok_command(mds_rank, command, success_validator)
+
+ def scrub_with_stray_evaluation(self, fs, mnt, path, flag, files=2000,
+ _hard_links=3):
+ fs.set_allow_new_snaps(True)
+
+ test_dir = "stray_eval_dir"
+ mnt.run_shell(["mkdir", test_dir])
+ client_path = os.path.join(mnt.mountpoint, test_dir)
+ mnt.create_n_files(fs_path=f"{test_dir}/file", count=files,
+ hard_links=_hard_links)
+ mnt.run_shell(["mkdir", f"{client_path}/.snap/snap1-{test_dir}"])
+ mnt.run_shell(f"find {client_path}/ -type f -delete")
+ mnt.run_shell(["rmdir", f"{client_path}/.snap/snap1-{test_dir}"])
+ perf_dump = fs.rank_tell(["perf", "dump"], 0)
+ self.assertNotEqual(perf_dump.get('mds_cache').get('num_strays'),
+ 0, "mdcache.num_strays is zero")
+
+ log.info(
+ f"num of strays: {perf_dump.get('mds_cache').get('num_strays')}")
+
+ out_json = fs.run_scrub(["start", path, flag])
+ self.assertNotEqual(out_json, None)
+ self.assertEqual(out_json["return_code"], 0)
+
+ self.assertEqual(
+ fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+ perf_dump = fs.rank_tell(["perf", "dump"], 0)
+ self.assertEqual(int(perf_dump.get('mds_cache').get('num_strays')),
+ 0, "mdcache.num_strays is non-zero")
+
+ def test_scrub_repair(self):
+ mds_rank = 0
+ test_dir = "scrub_repair_path"
+
+ self.mount_a.run_shell(["mkdir", test_dir])
+ self.mount_a.run_shell(["touch", "{0}/file".format(test_dir)])
+ dir_objname = "{:x}.00000000".format(self.mount_a.path_to_ino(test_dir))
+
+ self.mount_a.umount_wait()
+
+ # flush journal entries to dirfrag objects, and expire journal
+ self.fs.mds_asok(['flush', 'journal'])
+ self.fs.mds_stop()
+
+ # remove the dentry from dirfrag, cause incorrect fragstat/rstat
+ self.fs.radosm(["rmomapkey", dir_objname, "file_head"])
+
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_daemons()
+
+ self.mount_a.mount_wait()
+
+ # fragstat indicates the directory is not empty, rmdir should fail
+ with self.assertRaises(CommandFailedError) as ar:
+ self.mount_a.run_shell(["rmdir", test_dir])
+ self.assertEqual(ar.exception.exitstatus, 1)
+
+ self.tell_command(mds_rank, "scrub start /{0} repair".format(test_dir),
+ lambda j, r: self.json_validator(j, r, "return_code", 0))
+
+ # wait a few second for background repair
+ time.sleep(10)
+
+ # fragstat should be fixed
+ self.mount_a.run_shell(["rmdir", test_dir])
+
+ def test_stray_evaluation_with_scrub(self):
+ """
+ test that scrub can iterate over ~mdsdir and evaluate strays
+ """
+ self.scrub_with_stray_evaluation(self.fs, self.mount_a, "~mdsdir",
+ "recursive")
+
+ def test_flag_scrub_mdsdir(self):
+ """
+ test flag scrub_mdsdir
+ """
+ self.scrub_with_stray_evaluation(self.fs, self.mount_a, "/",
+ "recursive,scrub_mdsdir")
+
+ @staticmethod
+ def json_validator(json_out, rc, element, expected_value):
+ if rc != 0:
+ return False, "asok command returned error {rc}".format(rc=rc)
+ element_value = json_out.get(element)
+ if element_value != expected_value:
+ return False, "unexpectedly got {jv} instead of {ev}!".format(
+ jv=element_value, ev=expected_value)
+ return True, "Succeeded"
+
+ def tell_command(self, mds_rank, command, validator):
+ log.info("Running command '{command}'".format(command=command))
+
+ command_list = command.split()
+ jout = self.fs.rank_tell(command_list, mds_rank)
+
+ log.info("command '{command}' returned '{jout}'".format(
+ command=command, jout=jout))
+
+ success, errstring = validator(jout, 0)
+ if not success:
+ raise AsokCommandFailedError(command, 0, jout, errstring)
+ return jout
+
+ def asok_command(self, mds_rank, command, validator):
+ log.info("Running command '{command}'".format(command=command))
+
+ command_list = command.split()
+
+ # we just assume there's an active mds for every rank
+ mds_id = self.fs.get_active_names()[mds_rank]
+ proc = self.fs.mon_manager.admin_socket('mds', mds_id,
+ command_list, check_status=False)
+ rout = proc.exitstatus
+ sout = proc.stdout.getvalue()
+
+ if sout.strip():
+ jout = json.loads(sout)
+ else:
+ jout = None
+
+ log.info("command '{command}' got response code '{rout}' and stdout '{sout}'".format(
+ command=command, rout=rout, sout=sout))
+
+ success, errstring = validator(jout, rout)
+
+ if not success:
+ raise AsokCommandFailedError(command, rout, jout, errstring)
+
+ return jout
+
+ @staticmethod
+ def clone_repo(client_mount, path):
+ repo = "ceph-qa-suite"
+ repo_path = os.path.join(path, repo)
+ client_mount.run_shell(["mkdir", "-p", path])
+
+ try:
+ client_mount.stat(repo_path)
+ except CommandFailedError:
+ client_mount.run_shell([
+ "git", "clone", '--branch', 'giant',
+ "http://github.com/ceph/{repo}".format(repo=repo),
+ "{path}/{repo}".format(path=path, repo=repo)
+ ])
+
+ return repo_path
+
+
+class AsokCommandFailedError(Exception):
+ """
+ Exception thrown when we get an unexpected response
+ on an admin socket command
+ """
+
+ def __init__(self, command, rc, json_out, errstring):
+ self.command = command
+ self.rc = rc
+ self.json = json_out
+ self.errstring = errstring
+
+ def __str__(self):
+ return "Admin socket: {command} failed with rc={rc} json output={json}, because '{es}'".format(
+ command=self.command, rc=self.rc, json=self.json, es=self.errstring)
diff --git a/qa/tasks/cephfs/test_sessionmap.py b/qa/tasks/cephfs/test_sessionmap.py
new file mode 100644
index 000000000..ad6fd1d60
--- /dev/null
+++ b/qa/tasks/cephfs/test_sessionmap.py
@@ -0,0 +1,232 @@
+import time
+import json
+import logging
+
+from tasks.cephfs.fuse_mount import FuseMount
+from teuthology.exceptions import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+
+class TestSessionMap(CephFSTestCase):
+ CLIENTS_REQUIRED = 2
+ MDSS_REQUIRED = 2
+
+ def test_tell_session_drop(self):
+ """
+ That when a `tell` command is sent using the python CLI,
+ its MDS session is gone after it terminates
+ """
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ status = self.fs.status()
+ self.fs.rank_tell(["session", "ls"], status=status)
+
+ ls_data = self.fs.rank_asok(['session', 'ls'], status=status)
+ self.assertEqual(len(ls_data), 0)
+
+ def _get_connection_count(self, status=None):
+ perf = self.fs.rank_asok(["perf", "dump"], status=status)
+ conn = 0
+ for module, dump in perf.items():
+ if "AsyncMessenger::Worker" in module:
+ conn += dump['msgr_active_connections']
+ return conn
+
+ def test_tell_conn_close(self):
+ """
+ That when a `tell` command is sent using the python CLI,
+ the conn count goes back to where it started (i.e. we aren't
+ leaving connections open)
+ """
+ self.config_set('mds', 'ms_async_reap_threshold', '1')
+
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ status = self.fs.status()
+ s = self._get_connection_count(status=status)
+ self.fs.rank_tell(["session", "ls"], status=status)
+ self.wait_until_true(
+ lambda: self._get_connection_count(status=status) == s,
+ timeout=30
+ )
+
+ def test_mount_conn_close(self):
+ """
+ That when a client unmounts, the thread count on the MDS goes back
+ to what it was before the client mounted
+ """
+ self.config_set('mds', 'ms_async_reap_threshold', '1')
+
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ status = self.fs.status()
+ s = self._get_connection_count(status=status)
+ self.mount_a.mount_wait()
+ self.assertGreater(self._get_connection_count(status=status), s)
+ self.mount_a.umount_wait()
+ self.wait_until_true(
+ lambda: self._get_connection_count(status=status) == s,
+ timeout=30
+ )
+
+ def test_version_splitting(self):
+ """
+ That when many sessions are updated, they are correctly
+ split into multiple versions to obey mds_sessionmap_keys_per_op
+ """
+
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ # Configure MDS to write one OMAP key at once
+ self.set_conf('mds', 'mds_sessionmap_keys_per_op', 1)
+ self.fs.mds_fail_restart()
+ status = self.fs.wait_for_daemons()
+
+ # Bring the clients back
+ self.mount_a.mount_wait()
+ self.mount_b.mount_wait()
+
+ # See that they've got sessions
+ self.assert_session_count(2, mds_id=self.fs.get_rank(status=status)['name'])
+
+ # See that we persist their sessions
+ self.fs.rank_asok(["flush", "journal"], rank=0, status=status)
+ table_json = json.loads(self.fs.table_tool(["0", "show", "session"]))
+ log.info("SessionMap: {0}".format(json.dumps(table_json, indent=2)))
+ self.assertEqual(table_json['0']['result'], 0)
+ self.assertEqual(len(table_json['0']['data']['sessions']), 2)
+
+ # Now, induce a "force_open_sessions" event by exporting a dir
+ self.mount_a.run_shell(["mkdir", "bravo"])
+ self.mount_a.run_shell(["touch", "bravo/file_a"])
+ self.mount_b.run_shell(["touch", "bravo/file_b"])
+
+ self.fs.set_max_mds(2)
+ status = self.fs.wait_for_daemons()
+
+ def get_omap_wrs():
+ return self.fs.rank_asok(['perf', 'dump', 'objecter'], rank=1, status=status)['objecter']['omap_wr']
+
+ # Flush so that there are no dirty sessions on rank 1
+ self.fs.rank_asok(["flush", "journal"], rank=1, status=status)
+
+ # Export so that we get a force_open to rank 1 for the two sessions from rank 0
+ initial_omap_wrs = get_omap_wrs()
+ self.fs.rank_asok(['export', 'dir', '/bravo', '1'], rank=0, status=status)
+
+ # This is the critical (if rather subtle) check: that in the process of doing an export dir,
+ # we hit force_open_sessions, and as a result we end up writing out the sessionmap. There
+ # will be two sessions dirtied here, and because we have set keys_per_op to 1, we should see
+ # a single session get written out (the first of the two, triggered by the second getting marked
+ # dirty)
+ # The number of writes is two per session, because the header (sessionmap version) update and
+ # KV write both count. Also, multiply by 2 for each openfile table update.
+ self.wait_until_true(
+ lambda: get_omap_wrs() - initial_omap_wrs == 2*2,
+ timeout=30 # Long enough for an export to get acked
+ )
+
+ # Now end our sessions and check the backing sessionmap is updated correctly
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+
+ # In-memory sessionmap check
+ self.assert_session_count(0, mds_id=self.fs.get_rank(status=status)['name'])
+
+ # On-disk sessionmap check
+ self.fs.rank_asok(["flush", "journal"], rank=0, status=status)
+ table_json = json.loads(self.fs.table_tool(["0", "show", "session"]))
+ log.info("SessionMap: {0}".format(json.dumps(table_json, indent=2)))
+ self.assertEqual(table_json['0']['result'], 0)
+ self.assertEqual(len(table_json['0']['data']['sessions']), 0)
+
+ def _configure_auth(self, mount, id_name, mds_caps, osd_caps=None, mon_caps=None):
+ """
+ Set up auth credentials for a client mount, and write out the keyring
+ for the client to use.
+ """
+
+ if osd_caps is None:
+ osd_caps = "allow rw"
+
+ if mon_caps is None:
+ mon_caps = "allow r"
+
+ out = self.fs.mon_manager.raw_cluster_cmd(
+ "auth", "get-or-create", "client.{name}".format(name=id_name),
+ "mds", mds_caps,
+ "osd", osd_caps,
+ "mon", mon_caps
+ )
+ mount.client_id = id_name
+ mount.client_remote.write_file(mount.get_keyring_path(), out, sudo=True)
+ self.set_conf("client.{name}".format(name=id_name), "keyring", mount.get_keyring_path())
+
+ def test_session_reject(self):
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("Requires FUSE client to inject client metadata")
+
+ self.mount_a.run_shell(["mkdir", "foo"])
+ self.mount_a.run_shell(["mkdir", "foo/bar"])
+ self.mount_a.umount_wait()
+
+ # Mount B will be my rejected client
+ self.mount_b.umount_wait()
+
+ # Configure a client that is limited to /foo/bar
+ self._configure_auth(self.mount_b, "badguy", "allow rw path=/foo/bar")
+ # Check he can mount that dir and do IO
+ self.mount_b.mount_wait(cephfs_mntpt="/foo/bar")
+ self.mount_b.create_destroy()
+ self.mount_b.umount_wait()
+
+ # Configure the client to claim that its mount point metadata is /baz
+ self.set_conf("client.badguy", "client_metadata", "root=/baz")
+ # Try to mount the client, see that it fails
+ with self.assert_cluster_log("client session with non-allowable root '/baz' denied"):
+ with self.assertRaises(CommandFailedError):
+ self.mount_b.mount_wait(cephfs_mntpt="/foo/bar")
+
+ def test_session_evict_blocklisted(self):
+ """
+ Check that mds evicts blocklisted client
+ """
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("Requires FUSE client to use "
+ "mds_cluster.is_addr_blocklisted()")
+
+ self.fs.set_max_mds(2)
+ status = self.fs.wait_for_daemons()
+
+ self.mount_a.run_shell_payload("mkdir {d0,d1} && touch {d0,d1}/file")
+ self.mount_a.setfattr("d0", "ceph.dir.pin", "0")
+ self.mount_a.setfattr("d1", "ceph.dir.pin", "1")
+ self._wait_subtrees([('/d0', 0), ('/d1', 1)], status=status)
+
+ self.mount_a.run_shell(["touch", "d0/f0"])
+ self.mount_a.run_shell(["touch", "d1/f0"])
+ self.mount_b.run_shell(["touch", "d0/f1"])
+ self.mount_b.run_shell(["touch", "d1/f1"])
+
+ self.assert_session_count(2, mds_id=self.fs.get_rank(rank=0, status=status)['name'])
+ self.assert_session_count(2, mds_id=self.fs.get_rank(rank=1, status=status)['name'])
+
+ mount_a_client_id = self.mount_a.get_global_id()
+ self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id],
+ mds_id=self.fs.get_rank(rank=0, status=status)['name'])
+ self.wait_until_true(lambda: self.mds_cluster.is_addr_blocklisted(
+ self.mount_a.get_global_addr()), timeout=30)
+
+ # 10 seconds should be enough for evicting client
+ time.sleep(10)
+ self.assert_session_count(1, mds_id=self.fs.get_rank(rank=0, status=status)['name'])
+ self.assert_session_count(1, mds_id=self.fs.get_rank(rank=1, status=status)['name'])
+
+ self.mount_a.kill_cleanup()
+ self.mount_a.mount_wait()
diff --git a/qa/tasks/cephfs/test_snap_schedules.py b/qa/tasks/cephfs/test_snap_schedules.py
new file mode 100644
index 000000000..0264cac32
--- /dev/null
+++ b/qa/tasks/cephfs/test_snap_schedules.py
@@ -0,0 +1,607 @@
+import os
+import json
+import time
+import errno
+import logging
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+from datetime import datetime, timedelta
+
+log = logging.getLogger(__name__)
+
+def extract_schedule_and_retention_spec(spec=[]):
+ schedule = set([s[0] for s in spec])
+ retention = set([s[1] for s in spec])
+ return (schedule, retention)
+
+def seconds_upto_next_schedule(time_from, timo):
+ ts = int(time_from)
+ return ((int(ts / 60) * 60) + timo) - ts
+
+class TestSnapSchedulesHelper(CephFSTestCase):
+ CLIENTS_REQUIRED = 1
+
+ TEST_VOLUME_NAME = 'snap_vol'
+ TEST_DIRECTORY = 'snap_test_dir1'
+
+ # this should be in sync with snap_schedule format
+ SNAPSHOT_TS_FORMAT = '%Y-%m-%d-%H_%M_%S'
+
+ def check_scheduled_snapshot(self, exec_time, timo):
+ now = time.time()
+ delta = now - exec_time
+ log.debug(f'exec={exec_time}, now = {now}, timo = {timo}')
+ # tolerate snapshot existance in the range [-5,+5]
+ self.assertTrue((delta <= timo + 5) and (delta >= timo - 5))
+
+ def _fs_cmd(self, *args):
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", *args)
+
+ def fs_snap_schedule_cmd(self, *args, **kwargs):
+ if 'fs' in kwargs:
+ fs = kwargs.pop('fs')
+ args += ('--fs', fs)
+ if 'format' in kwargs:
+ fmt = kwargs.pop('format')
+ args += ('--format', fmt)
+ for name, val in kwargs.items():
+ args += (str(val),)
+ res = self._fs_cmd('snap-schedule', *args)
+ log.debug(f'res={res}')
+ return res
+
+ def _create_or_reuse_test_volume(self):
+ result = json.loads(self._fs_cmd("volume", "ls"))
+ if len(result) == 0:
+ self.vol_created = True
+ self.volname = TestSnapSchedulesHelper.TEST_VOLUME_NAME
+ self._fs_cmd("volume", "create", self.volname)
+ else:
+ self.volname = result[0]['name']
+
+ def _enable_snap_schedule(self):
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "snap_schedule")
+
+ def _disable_snap_schedule(self):
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "snap_schedule")
+
+ def _allow_minute_granularity_snapshots(self):
+ self.config_set('mgr', 'mgr/snap_schedule/allow_m_granularity', True)
+
+ def _dump_on_update(self):
+ self.config_set('mgr', 'mgr/snap_schedule/dump_on_update', True)
+
+ def setUp(self):
+ super(TestSnapSchedulesHelper, self).setUp()
+ self.volname = None
+ self.vol_created = False
+ self._create_or_reuse_test_volume()
+ self.create_cbks = []
+ self.remove_cbks = []
+ # used to figure out which snapshots are created/deleted
+ self.snapshots = set()
+ self._enable_snap_schedule()
+ self._allow_minute_granularity_snapshots()
+ self._dump_on_update()
+
+ def tearDown(self):
+ if self.vol_created:
+ self._delete_test_volume()
+ self._disable_snap_schedule()
+ super(TestSnapSchedulesHelper, self).tearDown()
+
+ def _schedule_to_timeout(self, schedule):
+ mult = schedule[-1]
+ period = int(schedule[0:-1])
+ if mult == 'M':
+ return period * 60
+ elif mult == 'h':
+ return period * 60 * 60
+ elif mult == 'd':
+ return period * 60 * 60 * 24
+ elif mult == 'w':
+ return period * 60 * 60 * 24 * 7
+ else:
+ raise RuntimeError('schedule multiplier not recognized')
+
+ def add_snap_create_cbk(self, cbk):
+ self.create_cbks.append(cbk)
+ def remove_snap_create_cbk(self, cbk):
+ self.create_cbks.remove(cbk)
+
+ def add_snap_remove_cbk(self, cbk):
+ self.remove_cbks.append(cbk)
+ def remove_snap_remove_cbk(self, cbk):
+ self.remove_cbks.remove(cbk)
+
+ def assert_if_not_verified(self):
+ self.assertListEqual(self.create_cbks, [])
+ self.assertListEqual(self.remove_cbks, [])
+
+ def verify(self, dir_path, max_trials):
+ trials = 0
+ snap_path = f'{dir_path}/.snap'
+ while (len(self.create_cbks) or len(self.remove_cbks)) and trials < max_trials:
+ snapshots = set(self.mount_a.ls(path=snap_path))
+ log.info(f'snapshots: {snapshots}')
+ added = snapshots - self.snapshots
+ log.info(f'added: {added}')
+ removed = self.snapshots - snapshots
+ log.info(f'removed: {removed}')
+ if added:
+ for cbk in list(self.create_cbks):
+ res = cbk(list(added))
+ if res:
+ self.remove_snap_create_cbk(cbk)
+ break
+ if removed:
+ for cbk in list(self.remove_cbks):
+ res = cbk(list(removed))
+ if res:
+ self.remove_snap_remove_cbk(cbk)
+ break
+ self.snapshots = snapshots
+ trials += 1
+ time.sleep(1)
+
+ def calc_wait_time_and_snap_name(self, snap_sched_exec_epoch, schedule):
+ timo = self._schedule_to_timeout(schedule)
+ # calculate wait time upto the next minute
+ wait_timo = seconds_upto_next_schedule(snap_sched_exec_epoch, timo)
+
+ # expected "scheduled" snapshot name
+ ts_name = (datetime.utcfromtimestamp(snap_sched_exec_epoch)
+ + timedelta(seconds=wait_timo)).strftime(TestSnapSchedulesHelper.SNAPSHOT_TS_FORMAT)
+ return (wait_timo, ts_name)
+
+ def verify_schedule(self, dir_path, schedules, retentions=[]):
+ log.debug(f'expected_schedule: {schedules}, expected_retention: {retentions}')
+
+ result = self.fs_snap_schedule_cmd('list', path=dir_path, format='json')
+ json_res = json.loads(result)
+ log.debug(f'json_res: {json_res}')
+
+ for schedule in schedules:
+ self.assertTrue(schedule in json_res['schedule'])
+ for retention in retentions:
+ self.assertTrue(retention in json_res['retention'])
+
+class TestSnapSchedules(TestSnapSchedulesHelper):
+ def remove_snapshots(self, dir_path):
+ snap_path = f'{dir_path}/.snap'
+
+ snapshots = self.mount_a.ls(path=snap_path)
+ for snapshot in snapshots:
+ snapshot_path = os.path.join(snap_path, snapshot)
+ log.debug(f'removing snapshot: {snapshot_path}')
+ self.mount_a.run_shell(['rmdir', snapshot_path])
+
+ def test_non_existent_snap_schedule_list(self):
+ """Test listing snap schedules on a non-existing filesystem path failure"""
+ try:
+ self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise RuntimeError('incorrect errno when listing a non-existing snap schedule')
+ else:
+ raise RuntimeError('expected "fs snap-schedule list" to fail')
+
+ def test_non_existent_schedule(self):
+ """Test listing non-existing snap schedules failure"""
+ self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
+
+ try:
+ self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise RuntimeError('incorrect errno when listing a non-existing snap schedule')
+ else:
+ raise RuntimeError('expected "fs snap-schedule list" returned fail')
+
+ self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY])
+
+ def test_snap_schedule_list_post_schedule_remove(self):
+ """Test listing snap schedules post removal of a schedule"""
+ self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
+
+ self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1h')
+
+ self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY)
+
+ try:
+ self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise RuntimeError('incorrect errno when listing a non-existing snap schedule')
+ else:
+ raise RuntimeError('"fs snap-schedule list" returned error')
+
+ self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY])
+
+ def test_snap_schedule(self):
+ """Test existence of a scheduled snapshot"""
+ self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
+
+ # set a schedule on the dir
+ self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M')
+ exec_time = time.time()
+
+ timo, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+ log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo}s...')
+ to_wait = timo + 2 # some leeway to avoid false failures...
+
+ # verify snapshot schedule
+ self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M'])
+
+ def verify_added(snaps_added):
+ log.debug(f'snapshots added={snaps_added}')
+ self.assertEqual(len(snaps_added), 1)
+ snapname = snaps_added[0]
+ if snapname.startswith('scheduled-'):
+ if snapname[10:26] == snap_sfx[:16]:
+ self.check_scheduled_snapshot(exec_time, timo)
+ return True
+ return False
+ self.add_snap_create_cbk(verify_added)
+ self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait)
+ self.assert_if_not_verified()
+
+ # remove snapshot schedule
+ self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY)
+
+ # remove all scheduled snapshots
+ self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY)
+
+ self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY])
+
+ def test_multi_snap_schedule(self):
+ """Test exisitence of multiple scheduled snapshots"""
+ self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
+
+ # set schedules on the dir
+ self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M')
+ self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='2M')
+ exec_time = time.time()
+
+ timo_1, snap_sfx_1 = self.calc_wait_time_and_snap_name(exec_time, '1M')
+ log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_1} in ~{timo_1}s...')
+ timo_2, snap_sfx_2 = self.calc_wait_time_and_snap_name(exec_time, '2M')
+ log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_2} in ~{timo_2}s...')
+ to_wait = timo_2 + 2 # use max timeout
+
+ # verify snapshot schedule
+ self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M', '2M'])
+
+ def verify_added_1(snaps_added):
+ log.debug(f'snapshots added={snaps_added}')
+ self.assertEqual(len(snaps_added), 1)
+ snapname = snaps_added[0]
+ if snapname.startswith('scheduled-'):
+ if snapname[10:26] == snap_sfx_1[:16]:
+ self.check_scheduled_snapshot(exec_time, timo_1)
+ return True
+ return False
+ def verify_added_2(snaps_added):
+ log.debug(f'snapshots added={snaps_added}')
+ self.assertEqual(len(snaps_added), 1)
+ snapname = snaps_added[0]
+ if snapname.startswith('scheduled-'):
+ if snapname[10:26] == snap_sfx_2[:16]:
+ self.check_scheduled_snapshot(exec_time, timo_2)
+ return True
+ return False
+ self.add_snap_create_cbk(verify_added_1)
+ self.add_snap_create_cbk(verify_added_2)
+ self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait)
+ self.assert_if_not_verified()
+
+ # remove snapshot schedule
+ self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY)
+
+ # remove all scheduled snapshots
+ self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY)
+
+ self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY])
+
+ def test_snap_schedule_with_retention(self):
+ """Test scheduled snapshots along with rentention policy"""
+ self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
+
+ # set a schedule on the dir
+ self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M')
+ self.fs_snap_schedule_cmd('retention', 'add', path=TestSnapSchedules.TEST_DIRECTORY, retention_spec_or_period='1M')
+ exec_time = time.time()
+
+ timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+ log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_1}s...')
+ to_wait = timo_1 + 2 # some leeway to avoid false failures...
+
+ # verify snapshot schedule
+ self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M'], retentions=[{'M':1}])
+
+ def verify_added(snaps_added):
+ log.debug(f'snapshots added={snaps_added}')
+ self.assertEqual(len(snaps_added), 1)
+ snapname = snaps_added[0]
+ if snapname.startswith('scheduled-'):
+ if snapname[10:26] == snap_sfx[:16]:
+ self.check_scheduled_snapshot(exec_time, timo_1)
+ return True
+ return False
+ self.add_snap_create_cbk(verify_added)
+ self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait)
+ self.assert_if_not_verified()
+
+ timo_2 = timo_1 + 60 # expected snapshot removal timeout
+ def verify_removed(snaps_removed):
+ log.debug(f'snapshots removed={snaps_removed}')
+ self.assertEqual(len(snaps_removed), 1)
+ snapname = snaps_removed[0]
+ if snapname.startswith('scheduled-'):
+ if snapname[10:26] == snap_sfx[:16]:
+ self.check_scheduled_snapshot(exec_time, timo_2)
+ return True
+ return False
+ log.debug(f'expecting removal of snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_2}s...')
+ to_wait = timo_2
+ self.add_snap_remove_cbk(verify_removed)
+ self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait+2)
+ self.assert_if_not_verified()
+
+ # remove snapshot schedule
+ self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY)
+
+ # remove all scheduled snapshots
+ self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY)
+
+ self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY])
+
+ def get_snap_stats(self, dir_path):
+ snap_path = f"{dir_path}/.snap"[1:]
+ snapshots = self.mount_a.ls(path=snap_path)
+ fs_count = len(snapshots)
+ log.debug(f'snapshots: {snapshots}')
+
+ result = self.fs_snap_schedule_cmd('status', path=dir_path,
+ format='json')
+ json_res = json.loads(result)[0]
+ db_count = int(json_res['created_count'])
+ log.debug(f'json_res: {json_res}')
+
+ snap_stats = dict()
+ snap_stats['fs_count'] = fs_count
+ snap_stats['db_count'] = db_count
+
+ log.debug(f'fs_count: {fs_count}')
+ log.debug(f'db_count: {db_count}')
+
+ return snap_stats
+
+ def verify_snap_stats(self, dir_path):
+ snap_stats = self.get_snap_stats(dir_path)
+ self.assertTrue(snap_stats['fs_count'] == snap_stats['db_count'])
+
+ def test_concurrent_snap_creates(self):
+ """Test concurrent snap creates in same file-system without db issues"""
+ """
+ Test snap creates at same cadence on same fs to verify correct stats.
+ A single SQLite DB Connection handle cannot be used to run concurrent
+ transactions and results transaction aborts. This test makes sure that
+ proper care has been taken in the code to avoid such situation by
+ verifying number of dirs created on the file system with the
+ created_count in the schedule_meta table for the specific path.
+ """
+ self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
+
+ testdirs = []
+ for d in range(10):
+ testdirs.append(os.path.join("/", TestSnapSchedules.TEST_DIRECTORY, "dir" + str(d)))
+
+ for d in testdirs:
+ self.mount_a.run_shell(['mkdir', '-p', d[1:]])
+ self.fs_snap_schedule_cmd('add', path=d, snap_schedule='1M')
+
+ exec_time = time.time()
+ timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+
+ for d in testdirs:
+ self.fs_snap_schedule_cmd('activate', path=d, snap_schedule='1M')
+
+ # we wait for 10 snaps to be taken
+ wait_time = timo_1 + 10 * 60 + 15
+ time.sleep(wait_time)
+
+ for d in testdirs:
+ self.fs_snap_schedule_cmd('deactivate', path=d, snap_schedule='1M')
+
+ for d in testdirs:
+ self.verify_snap_stats(d)
+
+ for d in testdirs:
+ self.fs_snap_schedule_cmd('remove', path=d, snap_schedule='1M')
+ self.remove_snapshots(d[1:])
+ self.mount_a.run_shell(['rmdir', d[1:]])
+
+ def test_snap_schedule_with_mgr_restart(self):
+ """Test that snap schedule is resumed after mgr restart"""
+ self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
+ testdir = os.path.join("/", TestSnapSchedules.TEST_DIRECTORY, "test_restart")
+ self.mount_a.run_shell(['mkdir', '-p', testdir[1:]])
+ self.fs_snap_schedule_cmd('add', path=testdir, snap_schedule='1M')
+
+ exec_time = time.time()
+ timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+
+ self.fs_snap_schedule_cmd('activate', path=testdir, snap_schedule='1M')
+
+ # we wait for 10 snaps to be taken
+ wait_time = timo_1 + 10 * 60 + 15
+ time.sleep(wait_time)
+
+ old_stats = self.get_snap_stats(testdir)
+ self.assertTrue(old_stats['fs_count'] == old_stats['db_count'])
+ self.assertTrue(old_stats['fs_count'] > 9)
+
+ # restart mgr
+ active_mgr = self.mgr_cluster.mon_manager.get_mgr_dump()['active_name']
+ log.debug(f'restarting active mgr: {active_mgr}')
+ self.mgr_cluster.mon_manager.revive_mgr(active_mgr)
+ time.sleep(300) # sleep for 5 minutes
+ self.fs_snap_schedule_cmd('deactivate', path=testdir, snap_schedule='1M')
+
+ new_stats = self.get_snap_stats(testdir)
+ self.assertTrue(new_stats['fs_count'] == new_stats['db_count'])
+ self.assertTrue(new_stats['fs_count'] > old_stats['fs_count'])
+ self.assertTrue(new_stats['db_count'] > old_stats['db_count'])
+
+ # cleanup
+ self.fs_snap_schedule_cmd('remove', path=testdir, snap_schedule='1M')
+ self.remove_snapshots(testdir[1:])
+ self.mount_a.run_shell(['rmdir', testdir[1:]])
+
+ def test_schedule_auto_deactivation_for_non_existent_path(self):
+ """
+ Test that a non-existent path leads to schedule deactivation after a few retries.
+ """
+ self.fs_snap_schedule_cmd('add', path="/bad-path", snap_schedule='1M')
+ start_time = time.time()
+
+ while time.time() - start_time < 60.0:
+ s = self.fs_snap_schedule_cmd('status', path="/bad-path", format='json')
+ json_status = json.loads(s)[0]
+
+ self.assertTrue(int(json_status['active']) == 1)
+ time.sleep(60)
+
+ s = self.fs_snap_schedule_cmd('status', path="/bad-path", format='json')
+ json_status = json.loads(s)[0]
+ self.assertTrue(int(json_status['active']) == 0)
+
+ # remove snapshot schedule
+ self.fs_snap_schedule_cmd('remove', path="/bad-path")
+
+ def test_snap_schedule_for_number_of_snaps_retention(self):
+ """
+ Test that number of snaps retained are as per user spec.
+ """
+ total_snaps = 55
+ test_dir = '/' + TestSnapSchedules.TEST_DIRECTORY
+
+ self.mount_a.run_shell(['mkdir', '-p', test_dir[1:]])
+
+ # set a schedule on the dir
+ self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1M')
+ self.fs_snap_schedule_cmd('retention', 'add', path=test_dir,
+ retention_spec_or_period=f'{total_snaps}n')
+ exec_time = time.time()
+
+ timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+
+ # verify snapshot schedule
+ self.verify_schedule(test_dir, ['1M'])
+
+ # we wait for total_snaps snaps to be taken
+ wait_time = timo_1 + total_snaps * 60 + 15
+ time.sleep(wait_time)
+
+ snap_stats = self.get_snap_stats(test_dir)
+ self.assertTrue(snap_stats['fs_count'] == total_snaps)
+ self.assertTrue(snap_stats['db_count'] >= total_snaps)
+
+ # remove snapshot schedule
+ self.fs_snap_schedule_cmd('remove', path=test_dir)
+
+ # remove all scheduled snapshots
+ self.remove_snapshots(test_dir[1:])
+
+ self.mount_a.run_shell(['rmdir', test_dir[1:]])
+
+
+class TestSnapSchedulesSnapdir(TestSnapSchedulesHelper):
+ def remove_snapshots(self, dir_path, sdn):
+ snap_path = f'{dir_path}/{sdn}'
+
+ snapshots = self.mount_a.ls(path=snap_path)
+ for snapshot in snapshots:
+ snapshot_path = os.path.join(snap_path, snapshot)
+ log.debug(f'removing snapshot: {snapshot_path}')
+ self.mount_a.run_shell(['rmdir', snapshot_path])
+
+ def get_snap_dir_name(self):
+ from tasks.cephfs.fuse_mount import FuseMount
+ from tasks.cephfs.kernel_mount import KernelMount
+
+ if isinstance(self.mount_a, KernelMount):
+ sdn = self.mount_a.client_config.get('snapdirname', '.snap')
+ elif isinstance(self.mount_a, FuseMount):
+ sdn = self.mount_a.client_config.get('client_snapdir', '.snap')
+ self.fs.set_ceph_conf('client', 'client snapdir', sdn)
+ self.mount_a.remount()
+ return sdn
+
+ def test_snap_dir_name(self):
+ """Test the correctness of snap directory name"""
+ self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedulesSnapdir.TEST_DIRECTORY])
+
+ # set a schedule on the dir
+ self.fs_snap_schedule_cmd('add', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY, snap_schedule='1M')
+ self.fs_snap_schedule_cmd('retention', 'add', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY, retention_spec_or_period='1M')
+ exec_time = time.time()
+
+ timo, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+ sdn = self.get_snap_dir_name()
+ log.info(f'expecting snap {TestSnapSchedulesSnapdir.TEST_DIRECTORY}/{sdn}/scheduled-{snap_sfx} in ~{timo}s...')
+
+ # verify snapshot schedule
+ self.verify_schedule(TestSnapSchedulesSnapdir.TEST_DIRECTORY, ['1M'], retentions=[{'M':1}])
+
+ # remove snapshot schedule
+ self.fs_snap_schedule_cmd('remove', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY)
+
+ # remove all scheduled snapshots
+ self.remove_snapshots(TestSnapSchedulesSnapdir.TEST_DIRECTORY, sdn)
+
+ self.mount_a.run_shell(['rmdir', TestSnapSchedulesSnapdir.TEST_DIRECTORY])
+
+
+"""
+Note that the class TestSnapSchedulesMandatoryFSArgument tests snap-schedule
+commands only for multi-fs scenario. Commands for a single default fs should
+pass for tests defined above or elsewhere.
+"""
+
+
+class TestSnapSchedulesMandatoryFSArgument(TestSnapSchedulesHelper):
+ REQUIRE_BACKUP_FILESYSTEM = True
+ TEST_DIRECTORY = 'mandatory_fs_argument_test_dir'
+
+ def test_snap_schedule_without_fs_argument(self):
+ """Test command fails without --fs argument in presence of multiple fs"""
+ test_path = TestSnapSchedulesMandatoryFSArgument.TEST_DIRECTORY
+ self.mount_a.run_shell(['mkdir', '-p', test_path])
+
+ # try setting a schedule on the dir; this should fail now that we are
+ # working with mutliple fs; we need the --fs argument if there are more
+ # than one fs hosted by the same cluster
+ with self.assertRaises(CommandFailedError):
+ self.fs_snap_schedule_cmd('add', test_path, snap_schedule='1M')
+
+ self.mount_a.run_shell(['rmdir', test_path])
+
+ def test_snap_schedule_for_non_default_fs(self):
+ """Test command succes with --fs argument for non-default fs"""
+ test_path = TestSnapSchedulesMandatoryFSArgument.TEST_DIRECTORY
+ self.mount_a.run_shell(['mkdir', '-p', test_path])
+
+ # use the backup fs as the second fs; all these commands must pass
+ self.fs_snap_schedule_cmd('add', test_path, snap_schedule='1M', fs='backup_fs')
+ self.fs_snap_schedule_cmd('activate', test_path, snap_schedule='1M', fs='backup_fs')
+ self.fs_snap_schedule_cmd('retention', 'add', test_path, retention_spec_or_period='1M', fs='backup_fs')
+ self.fs_snap_schedule_cmd('list', test_path, fs='backup_fs', format='json')
+ self.fs_snap_schedule_cmd('status', test_path, fs='backup_fs', format='json')
+ self.fs_snap_schedule_cmd('retention', 'remove', test_path, retention_spec_or_period='1M', fs='backup_fs')
+ self.fs_snap_schedule_cmd('deactivate', test_path, snap_schedule='1M', fs='backup_fs')
+ self.fs_snap_schedule_cmd('remove', test_path, snap_schedule='1M', fs='backup_fs')
+
+ self.mount_a.run_shell(['rmdir', test_path])
diff --git a/qa/tasks/cephfs/test_snapshots.py b/qa/tasks/cephfs/test_snapshots.py
new file mode 100644
index 000000000..608dcc81f
--- /dev/null
+++ b/qa/tasks/cephfs/test_snapshots.py
@@ -0,0 +1,605 @@
+import errno
+import logging
+import signal
+from textwrap import dedent
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.orchestra.run import Raw
+from teuthology.exceptions import CommandFailedError
+
+log = logging.getLogger(__name__)
+
+MDS_RESTART_GRACE = 60
+
+class TestSnapshots(CephFSTestCase):
+ MDSS_REQUIRED = 3
+ LOAD_SETTINGS = ["mds_max_snaps_per_dir"]
+
+ def _check_subtree(self, rank, path, status=None):
+ got_subtrees = self.fs.rank_asok(["get", "subtrees"], rank=rank, status=status)
+ for s in got_subtrees:
+ if s['dir']['path'] == path and s['auth_first'] == rank:
+ return True
+ return False
+
+ def _get_snapclient_dump(self, rank=0, status=None):
+ return self.fs.rank_asok(["dump", "snaps"], rank=rank, status=status)
+
+ def _get_snapserver_dump(self, rank=0, status=None):
+ return self.fs.rank_asok(["dump", "snaps", "--server"], rank=rank, status=status)
+
+ def _get_last_created_snap(self, rank=0, status=None):
+ return int(self._get_snapserver_dump(rank,status=status)["last_created"])
+
+ def _get_last_destroyed_snap(self, rank=0, status=None):
+ return int(self._get_snapserver_dump(rank,status=status)["last_destroyed"])
+
+ def _get_pending_snap_update(self, rank=0, status=None):
+ return self._get_snapserver_dump(rank,status=status)["pending_update"]
+
+ def _get_pending_snap_destroy(self, rank=0, status=None):
+ return self._get_snapserver_dump(rank,status=status)["pending_destroy"]
+
+ def test_allow_new_snaps_config(self):
+ """
+ Check whether 'allow_new_snaps' setting works
+ """
+ self.mount_a.run_shell(["mkdir", "test-allow-snaps"])
+
+ self.fs.set_allow_new_snaps(False);
+ try:
+ self.mount_a.run_shell(["mkdir", "test-allow-snaps/.snap/snap00"])
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EPERM, "expected EPERM")
+ else:
+ self.fail("expected snap creatiion to fail")
+
+ self.fs.set_allow_new_snaps(True);
+ self.mount_a.run_shell(["mkdir", "test-allow-snaps/.snap/snap00"])
+ self.mount_a.run_shell(["rmdir", "test-allow-snaps/.snap/snap00"])
+ self.mount_a.run_shell(["rmdir", "test-allow-snaps"])
+
+ def test_kill_mdstable(self):
+ """
+ check snaptable transcation
+ """
+ if not isinstance(self.mount_a, FuseMount):
+ self.skipTest("Require FUSE client to forcibly kill mount")
+
+ self.fs.set_allow_new_snaps(True);
+ self.fs.set_max_mds(2)
+ status = self.fs.wait_for_daemons()
+
+ # setup subtrees
+ self.mount_a.run_shell(["mkdir", "-p", "d1/dir"])
+ self.mount_a.setfattr("d1", "ceph.dir.pin", "1")
+ self._wait_subtrees([("/d1", 1)], rank=1, path="/d1")
+
+ last_created = self._get_last_created_snap(rank=0,status=status)
+
+ # mds_kill_mdstable_at:
+ # 1: MDSTableServer::handle_prepare
+ # 2: MDSTableServer::_prepare_logged
+ # 5: MDSTableServer::handle_commit
+ # 6: MDSTableServer::_commit_logged
+ for i in [1,2,5,6]:
+ log.info("testing snapserver mds_kill_mdstable_at={0}".format(i))
+
+ status = self.fs.status()
+ rank0 = self.fs.get_rank(rank=0, status=status)
+ self.fs.rank_freeze(True, rank=0)
+ self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)], rank=0, status=status)
+ proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s1{0}".format(i)], wait=False)
+ self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=0), timeout=self.fs.beacon_timeout);
+ self.delete_mds_coredump(rank0['name']);
+
+ self.fs.rank_fail(rank=0)
+ self.fs.mds_restart(rank0['name'])
+ self.wait_for_daemon_start([rank0['name']])
+ status = self.fs.wait_for_daemons()
+
+ proc.wait()
+ last_created += 1
+ self.wait_until_true(lambda: self._get_last_created_snap(rank=0) == last_created, timeout=30)
+
+ self.set_conf("mds", "mds_reconnect_timeout", "5")
+
+ self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")])
+
+ # set mds_kill_mdstable_at, also kill snapclient
+ for i in [2,5,6]:
+ log.info("testing snapserver mds_kill_mdstable_at={0}, also kill snapclient".format(i))
+ status = self.fs.status()
+ last_created = self._get_last_created_snap(rank=0, status=status)
+
+ rank0 = self.fs.get_rank(rank=0, status=status)
+ rank1 = self.fs.get_rank(rank=1, status=status)
+ self.fs.rank_freeze(True, rank=0) # prevent failover...
+ self.fs.rank_freeze(True, rank=1) # prevent failover...
+ self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)], rank=0, status=status)
+ proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s2{0}".format(i)], wait=False)
+ self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=0), timeout=self.fs.beacon_timeout);
+ self.delete_mds_coredump(rank0['name']);
+
+ self.fs.rank_signal(signal.SIGKILL, rank=1)
+
+ self.mount_a.kill()
+ self.mount_a.kill_cleanup()
+
+ self.fs.rank_fail(rank=0)
+ self.fs.mds_restart(rank0['name'])
+ self.wait_for_daemon_start([rank0['name']])
+
+ self.fs.wait_for_state('up:resolve', rank=0, timeout=MDS_RESTART_GRACE)
+ if i in [2,5]:
+ self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1)
+ elif i == 6:
+ self.assertEqual(len(self._get_pending_snap_update(rank=0)), 0)
+ self.assertGreater(self._get_last_created_snap(rank=0), last_created)
+
+ self.fs.rank_fail(rank=1)
+ self.fs.mds_restart(rank1['name'])
+ self.wait_for_daemon_start([rank1['name']])
+ self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE)
+
+ if i in [2,5]:
+ self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30)
+ if i == 2:
+ self.assertEqual(self._get_last_created_snap(rank=0), last_created)
+ else:
+ self.assertGreater(self._get_last_created_snap(rank=0), last_created)
+
+ self.mount_a.mount_wait()
+
+ self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")])
+
+ # mds_kill_mdstable_at:
+ # 3: MDSTableClient::handle_request (got agree)
+ # 4: MDSTableClient::commit
+ # 7: MDSTableClient::handle_request (got ack)
+ for i in [3,4,7]:
+ log.info("testing snapclient mds_kill_mdstable_at={0}".format(i))
+ last_created = self._get_last_created_snap(rank=0)
+
+ status = self.fs.status()
+ rank1 = self.fs.get_rank(rank=1, status=status)
+ self.fs.rank_freeze(True, rank=1) # prevent failover...
+ self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)], rank=1, status=status)
+ proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s3{0}".format(i)], wait=False)
+ self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=1), timeout=self.fs.beacon_timeout);
+ self.delete_mds_coredump(rank1['name']);
+
+ self.mount_a.kill()
+ self.mount_a.kill_cleanup()
+
+ if i in [3,4]:
+ self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1)
+ elif i == 7:
+ self.assertEqual(len(self._get_pending_snap_update(rank=0)), 0)
+ self.assertGreater(self._get_last_created_snap(rank=0), last_created)
+
+ self.fs.rank_fail(rank=1)
+ self.fs.mds_restart(rank1['name'])
+ self.wait_for_daemon_start([rank1['name']])
+ status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE)
+
+ if i in [3,4]:
+ self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30)
+ if i == 3:
+ self.assertEqual(self._get_last_created_snap(rank=0), last_created)
+ else:
+ self.assertGreater(self._get_last_created_snap(rank=0), last_created)
+
+ self.mount_a.mount_wait()
+
+ self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")])
+
+ # mds_kill_mdstable_at:
+ # 3: MDSTableClient::handle_request (got agree)
+ # 8: MDSTableServer::handle_rollback
+ log.info("testing snapclient mds_kill_mdstable_at=3, snapserver mds_kill_mdstable_at=8")
+ last_created = self._get_last_created_snap(rank=0)
+
+ status = self.fs.status()
+ rank0 = self.fs.get_rank(rank=0, status=status)
+ rank1 = self.fs.get_rank(rank=1, status=status)
+ self.fs.rank_freeze(True, rank=0)
+ self.fs.rank_freeze(True, rank=1)
+ self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "8"], rank=0, status=status)
+ self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "3"], rank=1, status=status)
+ proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s4"], wait=False)
+ self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=1), timeout=self.fs.beacon_timeout);
+ self.delete_mds_coredump(rank1['name']);
+
+ self.mount_a.kill()
+ self.mount_a.kill_cleanup()
+
+ self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1)
+
+ self.fs.rank_fail(rank=1)
+ self.fs.mds_restart(rank1['name'])
+ self.wait_for_daemon_start([rank1['name']])
+
+ # rollback triggers assertion
+ self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=0), timeout=self.fs.beacon_timeout);
+ self.delete_mds_coredump(rank0['name']);
+ self.fs.rank_fail(rank=0)
+ self.fs.mds_restart(rank0['name'])
+ self.wait_for_daemon_start([rank0['name']])
+ self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE)
+
+ # mds.1 should re-send rollback message
+ self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30)
+ self.assertEqual(self._get_last_created_snap(rank=0), last_created)
+
+ self.mount_a.mount_wait()
+
+ def test_snapclient_cache(self):
+ """
+ check if snapclient cache gets synced properly
+ """
+ self.fs.set_allow_new_snaps(True);
+ self.fs.set_max_mds(3)
+ status = self.fs.wait_for_daemons()
+
+ self.mount_a.run_shell(["mkdir", "-p", "d0/d1/dir"])
+ self.mount_a.run_shell(["mkdir", "-p", "d0/d2/dir"])
+ self.mount_a.setfattr("d0", "ceph.dir.pin", "0")
+ self.mount_a.setfattr("d0/d1", "ceph.dir.pin", "1")
+ self.mount_a.setfattr("d0/d2", "ceph.dir.pin", "2")
+ self._wait_subtrees([("/d0", 0), ("/d0/d1", 1), ("/d0/d2", 2)], rank="all", status=status, path="/d0")
+
+ def _check_snapclient_cache(snaps_dump, cache_dump=None, rank=0):
+ if cache_dump is None:
+ cache_dump = self._get_snapclient_dump(rank=rank)
+ for key, value in cache_dump.items():
+ if value != snaps_dump[key]:
+ return False
+ return True;
+
+ # sync after mksnap
+ last_created = self._get_last_created_snap(rank=0)
+ self.mount_a.run_shell(["mkdir", "d0/d1/dir/.snap/s1", "d0/d1/dir/.snap/s2"])
+ self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30)
+ self.assertGreater(self._get_last_created_snap(rank=0), last_created)
+
+ snaps_dump = self._get_snapserver_dump(rank=0)
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0));
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1));
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2));
+
+ # sync after rmsnap
+ last_destroyed = self._get_last_destroyed_snap(rank=0)
+ self.mount_a.run_shell(["rmdir", "d0/d1/dir/.snap/s1"])
+ self.wait_until_true(lambda: len(self._get_pending_snap_destroy(rank=0)) == 0, timeout=30)
+ self.assertGreater(self._get_last_destroyed_snap(rank=0), last_destroyed)
+
+ snaps_dump = self._get_snapserver_dump(rank=0)
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0));
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1));
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2));
+
+ # sync during mds recovers
+ self.fs.rank_fail(rank=2)
+ status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE)
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2));
+
+ self.fs.rank_fail(rank=0)
+ self.fs.rank_fail(rank=1)
+ status = self.fs.wait_for_daemons()
+ self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE)
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0));
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1));
+ self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2));
+
+ # kill at MDSTableClient::handle_notify_prep
+ status = self.fs.status()
+ rank2 = self.fs.get_rank(rank=2, status=status)
+ self.fs.rank_freeze(True, rank=2)
+ self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "9"], rank=2, status=status)
+ proc = self.mount_a.run_shell(["mkdir", "d0/d1/dir/.snap/s3"], wait=False)
+ self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=2), timeout=self.fs.beacon_timeout);
+ self.delete_mds_coredump(rank2['name']);
+
+ # mksnap should wait for notify ack from mds.2
+ self.assertFalse(proc.finished);
+
+ # mksnap should proceed after mds.2 fails
+ self.fs.rank_fail(rank=2)
+ self.wait_until_true(lambda: proc.finished, timeout=30);
+
+ self.fs.mds_restart(rank2['name'])
+ self.wait_for_daemon_start([rank2['name']])
+ status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE)
+
+ self.mount_a.run_shell(["rmdir", Raw("d0/d1/dir/.snap/*")])
+
+ # kill at MDSTableClient::commit
+ # the recovering mds should sync all mds' cache when it enters resolve stage
+ self.set_conf("mds", "mds_reconnect_timeout", "5")
+ for i in range(1, 4):
+ status = self.fs.status()
+ rank2 = self.fs.get_rank(rank=2, status=status)
+ self.fs.rank_freeze(True, rank=2)
+ self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "4"], rank=2, status=status)
+ last_created = self._get_last_created_snap(rank=0)
+ proc = self.mount_a.run_shell(["mkdir", "d0/d2/dir/.snap/s{0}".format(i)], wait=False)
+ self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=2), timeout=self.fs.beacon_timeout);
+ self.delete_mds_coredump(rank2['name']);
+
+ self.mount_a.kill()
+ self.mount_a.kill_cleanup()
+
+ self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1)
+
+ if i in [2,4]:
+ self.fs.rank_fail(rank=0)
+ if i in [3,4]:
+ self.fs.rank_fail(rank=1)
+
+ self.fs.rank_fail(rank=2)
+ self.fs.mds_restart(rank2['name'])
+ self.wait_for_daemon_start([rank2['name']])
+ status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE)
+
+ rank0_cache = self._get_snapclient_dump(rank=0)
+ rank1_cache = self._get_snapclient_dump(rank=1)
+ rank2_cache = self._get_snapclient_dump(rank=2)
+
+ self.assertGreater(int(rank0_cache["last_created"]), last_created)
+ self.assertEqual(rank0_cache, rank1_cache);
+ self.assertEqual(rank0_cache, rank2_cache);
+
+ self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30)
+
+ snaps_dump = self._get_snapserver_dump(rank=0)
+ self.assertEqual(snaps_dump["last_created"], rank0_cache["last_created"])
+ self.assertTrue(_check_snapclient_cache(snaps_dump, cache_dump=rank0_cache));
+
+ self.mount_a.mount_wait()
+
+ self.mount_a.run_shell(["rmdir", Raw("d0/d2/dir/.snap/*")])
+
+ def test_multimds_mksnap(self):
+ """
+ check if snapshot takes effect across authority subtrees
+ """
+ self.fs.set_allow_new_snaps(True);
+ self.fs.set_max_mds(2)
+ status = self.fs.wait_for_daemons()
+
+ self.mount_a.run_shell(["mkdir", "-p", "d0/d1/empty"])
+ self.mount_a.setfattr("d0", "ceph.dir.pin", "0")
+ self.mount_a.setfattr("d0/d1", "ceph.dir.pin", "1")
+ self._wait_subtrees([("/d0", 0), ("/d0/d1", 1)], rank="all", status=status, path="/d0")
+
+ self.mount_a.write_test_pattern("d0/d1/file_a", 8 * 1024 * 1024)
+ self.mount_a.run_shell(["mkdir", "d0/.snap/s1"])
+ self.mount_a.run_shell(["rm", "-f", "d0/d1/file_a"])
+ self.mount_a.validate_test_pattern("d0/.snap/s1/d1/file_a", 8 * 1024 * 1024)
+
+ self.mount_a.run_shell(["rmdir", "d0/.snap/s1"])
+ self.mount_a.run_shell(["rm", "-rf", "d0"])
+
+ def test_multimds_past_parents(self):
+ """
+ check if past parents are properly recorded during across authority rename
+ """
+ self.fs.set_allow_new_snaps(True);
+ self.fs.set_max_mds(2)
+ status = self.fs.wait_for_daemons()
+
+ self.mount_a.run_shell_payload("mkdir -p {d0,d1}/empty")
+ self.mount_a.setfattr("d0", "ceph.dir.pin", "0")
+ self.mount_a.setfattr("d1", "ceph.dir.pin", "1")
+ self._wait_subtrees([("/d0", 0), ("/d1", 1)], rank=0, status=status)
+
+ self.mount_a.run_shell(["mkdir", "d0/d3"])
+ self.mount_a.run_shell(["mkdir", "d0/.snap/s1"])
+ snap_name = self.mount_a.run_shell(["ls", "d0/d3/.snap"]).stdout.getvalue()
+
+ self.mount_a.run_shell(["mv", "d0/d3", "d1/d3"])
+ snap_name1 = self.mount_a.run_shell(["ls", "d1/d3/.snap"]).stdout.getvalue()
+ self.assertEqual(snap_name1, snap_name);
+
+ self.mount_a.run_shell(["rmdir", "d0/.snap/s1"])
+ snap_name1 = self.mount_a.run_shell(["ls", "d1/d3/.snap"]).stdout.getvalue()
+ self.assertEqual(snap_name1, "");
+
+ self.mount_a.run_shell(["rm", "-rf", "d0", "d1"])
+
+ def test_multimds_hardlink(self):
+ """
+ check if hardlink snapshot works in multimds setup
+ """
+ self.fs.set_allow_new_snaps(True);
+ self.fs.set_max_mds(2)
+ status = self.fs.wait_for_daemons()
+
+ self.mount_a.run_shell_payload("mkdir -p {d0,d1}/empty")
+
+ self.mount_a.setfattr("d0", "ceph.dir.pin", "0")
+ self.mount_a.setfattr("d1", "ceph.dir.pin", "1")
+ self._wait_subtrees([("/d0", 0), ("/d1", 1)], rank=0, status=status)
+
+ self.mount_a.run_python(dedent("""
+ import os
+ open(os.path.join("{path}", "d0/file1"), 'w').write("asdf")
+ open(os.path.join("{path}", "d0/file2"), 'w').write("asdf")
+ """.format(path=self.mount_a.mountpoint)
+ ))
+
+ self.mount_a.run_shell(["ln", "d0/file1", "d1/file1"])
+ self.mount_a.run_shell(["ln", "d0/file2", "d1/file2"])
+
+ self.mount_a.run_shell(["mkdir", "d1/.snap/s1"])
+
+ self.mount_a.run_python(dedent("""
+ import os
+ open(os.path.join("{path}", "d0/file1"), 'w').write("qwer")
+ """.format(path=self.mount_a.mountpoint)
+ ))
+
+ self.mount_a.run_shell(["grep", "asdf", "d1/.snap/s1/file1"])
+
+ self.mount_a.run_shell(["rm", "-f", "d0/file2"])
+ self.mount_a.run_shell(["grep", "asdf", "d1/.snap/s1/file2"])
+
+ self.mount_a.run_shell(["rm", "-f", "d1/file2"])
+ self.mount_a.run_shell(["grep", "asdf", "d1/.snap/s1/file2"])
+
+ self.mount_a.run_shell(["rmdir", "d1/.snap/s1"])
+ self.mount_a.run_shell(["rm", "-rf", "d0", "d1"])
+
+ class SnapLimitViolationException(Exception):
+ failed_snapshot_number = -1
+
+ def __init__(self, num):
+ self.failed_snapshot_number = num
+
+ def get_snap_name(self, dir_name, sno):
+ sname = "{dir_name}/.snap/s_{sno}".format(dir_name=dir_name, sno=sno)
+ return sname
+
+ def create_snap_dir(self, sname):
+ self.mount_a.run_shell(["mkdir", sname])
+
+ def delete_dir_and_snaps(self, dir_name, snaps):
+ for sno in range(1, snaps+1, 1):
+ sname = self.get_snap_name(dir_name, sno)
+ self.mount_a.run_shell(["rmdir", sname])
+ self.mount_a.run_shell(["rmdir", dir_name])
+
+ def create_dir_and_snaps(self, dir_name, snaps):
+ self.mount_a.run_shell(["mkdir", dir_name])
+
+ for sno in range(1, snaps+1, 1):
+ sname = self.get_snap_name(dir_name, sno)
+ try:
+ self.create_snap_dir(sname)
+ except CommandFailedError as e:
+ # failing at the last mkdir beyond the limit is expected
+ if sno == snaps:
+ log.info("failed while creating snap #{}: {}".format(sno, repr(e)))
+ raise TestSnapshots.SnapLimitViolationException(sno)
+
+ def test_mds_max_snaps_per_dir_default_limit(self):
+ """
+ Test the newly introudced option named mds_max_snaps_per_dir
+ Default snaps limit is 100
+ Test if the default number of snapshot directories can be created
+ """
+ self.create_dir_and_snaps("accounts", int(self.mds_max_snaps_per_dir))
+ self.delete_dir_and_snaps("accounts", int(self.mds_max_snaps_per_dir))
+
+ def test_mds_max_snaps_per_dir_with_increased_limit(self):
+ """
+ Test the newly introudced option named mds_max_snaps_per_dir
+ First create 101 directories and ensure that the 101st directory
+ creation fails. Then increase the default by one and see if the
+ additional directory creation succeeds
+ """
+ # first test the default limit
+ new_limit = int(self.mds_max_snaps_per_dir)
+ self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)])
+ try:
+ self.create_dir_and_snaps("accounts", new_limit + 1)
+ except TestSnapshots.SnapLimitViolationException as e:
+ if e.failed_snapshot_number == (new_limit + 1):
+ pass
+ # then increase the limit by one and test
+ new_limit = new_limit + 1
+ self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)])
+ sname = self.get_snap_name("accounts", new_limit)
+ self.create_snap_dir(sname)
+ self.delete_dir_and_snaps("accounts", new_limit)
+
+ def test_mds_max_snaps_per_dir_with_reduced_limit(self):
+ """
+ Test the newly introudced option named mds_max_snaps_per_dir
+ First create 99 directories. Then reduce the limit to 98. Then try
+ creating another directory and ensure that additional directory
+ creation fails.
+ """
+ # first test the new limit
+ new_limit = int(self.mds_max_snaps_per_dir) - 1
+ self.create_dir_and_snaps("accounts", new_limit)
+ sname = self.get_snap_name("accounts", new_limit + 1)
+ # then reduce the limit by one and test
+ new_limit = new_limit - 1
+ self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)])
+ try:
+ self.create_snap_dir(sname)
+ except CommandFailedError:
+ # after reducing limit we expect the new snapshot creation to fail
+ pass
+ self.delete_dir_and_snaps("accounts", new_limit + 1)
+
+
+class TestMonSnapsAndFsPools(CephFSTestCase):
+ MDSS_REQUIRED = 3
+
+ def test_disallow_monitor_managed_snaps_for_fs_pools(self):
+ """
+ Test that creation of monitor managed snaps fails for pools attached
+ to any file-system
+ """
+ with self.assertRaises(CommandFailedError):
+ self.fs.rados(["mksnap", "snap1"], pool=self.fs.get_data_pool_name())
+
+ with self.assertRaises(CommandFailedError):
+ self.fs.rados(["mksnap", "snap2"], pool=self.fs.get_metadata_pool_name())
+
+ with self.assertRaises(CommandFailedError):
+ test_pool_name = self.fs.get_data_pool_name()
+ base_cmd = f'osd pool mksnap {test_pool_name} snap3'
+ self.run_cluster_cmd(base_cmd)
+
+ with self.assertRaises(CommandFailedError):
+ test_pool_name = self.fs.get_metadata_pool_name()
+ base_cmd = f'osd pool mksnap {test_pool_name} snap4'
+ self.run_cluster_cmd(base_cmd)
+
+ def test_attaching_pools_with_snaps_to_fs_fails(self):
+ """
+ Test that attempt to attach pool with snapshots to an fs fails
+ """
+ test_pool_name = 'snap-test-pool'
+ base_cmd = f'osd pool create {test_pool_name}'
+ ret = self.run_cluster_cmd_result(base_cmd)
+ self.assertEqual(ret, 0)
+
+ self.fs.rados(["mksnap", "snap3"], pool=test_pool_name)
+
+ base_cmd = f'fs add_data_pool {self.fs.name} {test_pool_name}'
+ ret = self.run_cluster_cmd_result(base_cmd)
+ self.assertEqual(ret, errno.EOPNOTSUPP)
+
+ # cleanup
+ self.fs.rados(["rmsnap", "snap3"], pool=test_pool_name)
+ base_cmd = f'osd pool delete {test_pool_name}'
+ ret = self.run_cluster_cmd_result(base_cmd)
+
+ def test_using_pool_with_snap_fails_fs_creation(self):
+ """
+ Test that using a pool with snaps for fs creation fails
+ """
+ base_cmd = 'osd pool create test_data_pool'
+ ret = self.run_cluster_cmd_result(base_cmd)
+ self.assertEqual(ret, 0)
+ base_cmd = 'osd pool create test_metadata_pool'
+ ret = self.run_cluster_cmd_result(base_cmd)
+ self.assertEqual(ret, 0)
+
+ self.fs.rados(["mksnap", "snap4"], pool='test_data_pool')
+
+ base_cmd = 'fs new testfs test_metadata_pool test_data_pool'
+ ret = self.run_cluster_cmd_result(base_cmd)
+ self.assertEqual(ret, errno.EOPNOTSUPP)
+
+ # cleanup
+ self.fs.rados(["rmsnap", "snap4"], pool='test_data_pool')
+ base_cmd = 'osd pool delete test_data_pool'
+ ret = self.run_cluster_cmd_result(base_cmd)
+ base_cmd = 'osd pool delete test_metadata_pool'
+ ret = self.run_cluster_cmd_result(base_cmd)
diff --git a/qa/tasks/cephfs/test_strays.py b/qa/tasks/cephfs/test_strays.py
new file mode 100644
index 000000000..8bdc126e2
--- /dev/null
+++ b/qa/tasks/cephfs/test_strays.py
@@ -0,0 +1,1027 @@
+import json
+import time
+import logging
+from textwrap import dedent
+import datetime
+import gevent
+
+from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra.run import Raw
+from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
+
+log = logging.getLogger(__name__)
+
+
+class TestStrays(CephFSTestCase):
+ MDSS_REQUIRED = 2
+
+ OPS_THROTTLE = 1
+ FILES_THROTTLE = 2
+
+ # Range of different file sizes used in throttle test's workload
+ throttle_workload_size_range = 16
+
+ @for_teuthology
+ def test_ops_throttle(self):
+ self._test_throttling(self.OPS_THROTTLE)
+
+ @for_teuthology
+ def test_files_throttle(self):
+ self._test_throttling(self.FILES_THROTTLE)
+
+ def test_dir_deletion(self):
+ """
+ That when deleting a bunch of dentries and the containing
+ directory, everything gets purged.
+ Catches cases where the client might e.g. fail to trim
+ the unlinked dir from its cache.
+ """
+ file_count = 1000
+ create_script = dedent("""
+ import os
+
+ mountpoint = "{mountpoint}"
+ subdir = "delete_me"
+ size = {size}
+ file_count = {file_count}
+ os.mkdir(os.path.join(mountpoint, subdir))
+ for i in range(0, file_count):
+ filename = "{{0}}_{{1}}.bin".format(i, size)
+ with open(os.path.join(mountpoint, subdir, filename), 'w') as f:
+ f.write(size * 'x')
+ """.format(
+ mountpoint=self.mount_a.mountpoint,
+ size=1024,
+ file_count=file_count
+ ))
+
+ self.mount_a.run_python(create_script)
+
+ # That the dirfrag object is created
+ self.fs.mds_asok(["flush", "journal"])
+ dir_ino = self.mount_a.path_to_ino("delete_me")
+ self.assertTrue(self.fs.dirfrag_exists(dir_ino, 0))
+
+ # Remove everything
+ self.mount_a.run_shell(["rm", "-rf", "delete_me"])
+ self.fs.mds_asok(["flush", "journal"])
+
+ # That all the removed files get created as strays
+ strays = self.get_mdc_stat("strays_created")
+ self.assertEqual(strays, file_count + 1)
+
+ # That the strays all get enqueued for purge
+ self.wait_until_equal(
+ lambda: self.get_mdc_stat("strays_enqueued"),
+ strays,
+ timeout=600
+
+ )
+
+ # That all the purge operations execute
+ self.wait_until_equal(
+ lambda: self.get_stat("purge_queue", "pq_executed"),
+ strays,
+ timeout=600
+ )
+
+ # That finally, the directory metadata object is gone
+ self.assertFalse(self.fs.dirfrag_exists(dir_ino, 0))
+
+ # That finally, the data objects are all gone
+ self.await_data_pool_empty()
+
+ def _test_throttling(self, throttle_type):
+ self.data_log = []
+ try:
+ return self._do_test_throttling(throttle_type)
+ except:
+ for l in self.data_log:
+ log.info(",".join([l_.__str__() for l_ in l]))
+ raise
+
+ def _do_test_throttling(self, throttle_type):
+ """
+ That the mds_max_purge_ops setting is respected
+ """
+
+ def set_throttles(files, ops):
+ """
+ Helper for updating ops/files limits, and calculating effective
+ ops_per_pg setting to give the same ops limit.
+ """
+ self.set_conf('mds', 'mds_max_purge_files', "%d" % files)
+ self.set_conf('mds', 'mds_max_purge_ops', "%d" % ops)
+
+ pgs = self.fs.mon_manager.get_pool_int_property(
+ self.fs.get_data_pool_name(),
+ "pg_num"
+ )
+ ops_per_pg = float(ops) / pgs
+ self.set_conf('mds', 'mds_max_purge_ops_per_pg', "%s" % ops_per_pg)
+
+ # Test conditions depend on what we're going to be exercising.
+ # * Lift the threshold on whatever throttle we are *not* testing, so
+ # that the throttle of interest is the one that will be the bottleneck
+ # * Create either many small files (test file count throttling) or fewer
+ # large files (test op throttling)
+ if throttle_type == self.OPS_THROTTLE:
+ set_throttles(files=100000000, ops=16)
+ size_unit = 1024 * 1024 # big files, generate lots of ops
+ file_multiplier = 100
+ elif throttle_type == self.FILES_THROTTLE:
+ # The default value of file limit is pretty permissive, so to avoid
+ # the test running too fast, create lots of files and set the limit
+ # pretty low.
+ set_throttles(ops=100000000, files=6)
+ size_unit = 1024 # small, numerous files
+ file_multiplier = 200
+ else:
+ raise NotImplementedError(throttle_type)
+
+ # Pick up config changes
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_daemons()
+
+ create_script = dedent("""
+ import os
+
+ mountpoint = "{mountpoint}"
+ subdir = "delete_me"
+ size_unit = {size_unit}
+ file_multiplier = {file_multiplier}
+ os.mkdir(os.path.join(mountpoint, subdir))
+ for i in range(0, file_multiplier):
+ for size in range(0, {size_range}*size_unit, size_unit):
+ filename = "{{0}}_{{1}}.bin".format(i, size // size_unit)
+ with open(os.path.join(mountpoint, subdir, filename), 'w') as f:
+ f.write(size * 'x')
+ """.format(
+ mountpoint=self.mount_a.mountpoint,
+ size_unit=size_unit,
+ file_multiplier=file_multiplier,
+ size_range=self.throttle_workload_size_range
+ ))
+
+ self.mount_a.run_python(create_script)
+
+ # We will run the deletion in the background, to reduce the risk of it completing before
+ # we have started monitoring the stray statistics.
+ def background():
+ self.mount_a.run_shell(["rm", "-rf", "delete_me"])
+ self.fs.mds_asok(["flush", "journal"])
+
+ background_thread = gevent.spawn(background)
+
+ total_inodes = file_multiplier * self.throttle_workload_size_range + 1
+ mds_max_purge_ops = int(self.fs.get_config("mds_max_purge_ops", 'mds'))
+ mds_max_purge_files = int(self.fs.get_config("mds_max_purge_files", 'mds'))
+
+ # During this phase we look for the concurrent ops to exceed half
+ # the limit (a heuristic) and not exceed the limit (a correctness
+ # condition).
+ purge_timeout = 600
+ elapsed = 0
+ files_high_water = 0
+ ops_high_water = 0
+
+ while True:
+ stats = self.fs.mds_asok(['perf', 'dump'])
+ mdc_stats = stats['mds_cache']
+ pq_stats = stats['purge_queue']
+ if elapsed >= purge_timeout:
+ raise RuntimeError("Timeout waiting for {0} inodes to purge, stats:{1}".format(total_inodes, mdc_stats))
+
+ num_strays = mdc_stats['num_strays']
+ num_strays_purging = pq_stats['pq_executing']
+ num_purge_ops = pq_stats['pq_executing_ops']
+ files_high_water = pq_stats['pq_executing_high_water']
+ ops_high_water = pq_stats['pq_executing_ops_high_water']
+
+ self.data_log.append([datetime.datetime.now(), num_strays, num_strays_purging, num_purge_ops, files_high_water, ops_high_water])
+
+ total_strays_created = mdc_stats['strays_created']
+ total_strays_purged = pq_stats['pq_executed']
+
+ if total_strays_purged == total_inodes:
+ log.info("Complete purge in {0} seconds".format(elapsed))
+ break
+ elif total_strays_purged > total_inodes:
+ raise RuntimeError("Saw more strays than expected, mdc stats: {0}".format(mdc_stats))
+ else:
+ if throttle_type == self.OPS_THROTTLE:
+ # 11 is filer_max_purge_ops plus one for the backtrace:
+ # limit is allowed to be overshot by this much.
+ if num_purge_ops > mds_max_purge_ops + 11:
+ raise RuntimeError("num_purge_ops violates threshold {0}/{1}".format(
+ num_purge_ops, mds_max_purge_ops
+ ))
+ elif throttle_type == self.FILES_THROTTLE:
+ if num_strays_purging > mds_max_purge_files:
+ raise RuntimeError("num_strays_purging violates threshold {0}/{1}".format(
+ num_strays_purging, mds_max_purge_files
+ ))
+ else:
+ raise NotImplementedError(throttle_type)
+
+ log.info("Waiting for purge to complete {0}/{1}, {2}/{3}".format(
+ num_strays_purging, num_strays,
+ total_strays_purged, total_strays_created
+ ))
+ time.sleep(1)
+ elapsed += 1
+
+ background_thread.join()
+
+ # Check that we got up to a respectable rate during the purge. This is totally
+ # racy, but should be safeish unless the cluster is pathologically slow, or
+ # insanely fast such that the deletions all pass before we have polled the
+ # statistics.
+ if throttle_type == self.OPS_THROTTLE:
+ if ops_high_water < mds_max_purge_ops // 2:
+ raise RuntimeError("Ops in flight high water is unexpectedly low ({0} / {1})".format(
+ ops_high_water, mds_max_purge_ops
+ ))
+ # The MDS may go over mds_max_purge_ops for some items, like a
+ # heavily fragmented directory. The throttle does not kick in
+ # until *after* we reach or exceed the limit. This is expected
+ # because we don't want to starve the PQ or never purge a
+ # particularly large file/directory.
+ self.assertLessEqual(ops_high_water, mds_max_purge_ops+64)
+ elif throttle_type == self.FILES_THROTTLE:
+ if files_high_water < mds_max_purge_files // 2:
+ raise RuntimeError("Files in flight high water is unexpectedly low ({0} / {1})".format(
+ files_high_water, mds_max_purge_files
+ ))
+ self.assertLessEqual(files_high_water, mds_max_purge_files)
+
+ # Sanity check all MDC stray stats
+ stats = self.fs.mds_asok(['perf', 'dump'])
+ mdc_stats = stats['mds_cache']
+ pq_stats = stats['purge_queue']
+ self.assertEqual(mdc_stats['num_strays'], 0)
+ self.assertEqual(mdc_stats['num_strays_delayed'], 0)
+ self.assertEqual(pq_stats['pq_executing'], 0)
+ self.assertEqual(pq_stats['pq_executing_ops'], 0)
+ self.assertEqual(mdc_stats['strays_created'], total_inodes)
+ self.assertEqual(mdc_stats['strays_enqueued'], total_inodes)
+ self.assertEqual(pq_stats['pq_executed'], total_inodes)
+
+ def get_mdc_stat(self, name, mds_id=None):
+ return self.get_stat("mds_cache", name, mds_id)
+
+ def get_stat(self, subsys, name, mds_id=None):
+ return self.fs.mds_asok(['perf', 'dump', subsys, name],
+ mds_id=mds_id)[subsys][name]
+
+ def _wait_for_counter(self, subsys, counter, expect_val, timeout=60,
+ mds_id=None):
+ self.wait_until_equal(
+ lambda: self.get_stat(subsys, counter, mds_id),
+ expect_val=expect_val, timeout=timeout,
+ reject_fn=lambda x: x > expect_val
+ )
+
+ def test_open_inode(self):
+ """
+ That the case of a dentry unlinked while a client holds an
+ inode open is handled correctly.
+
+ The inode should be moved into a stray dentry, while the original
+ dentry and directory should be purged.
+
+ The inode's data should be purged when the client eventually closes
+ it.
+ """
+ mount_a_client_id = self.mount_a.get_global_id()
+
+ # Write some bytes to a file
+ size_mb = 8
+
+ # Hold the file open
+ p = self.mount_a.open_background("open_file")
+ self.mount_a.write_n_mb("open_file", size_mb)
+ open_file_ino = self.mount_a.path_to_ino("open_file")
+
+ self.assertEqual(self.get_session(mount_a_client_id)['num_caps'], 2)
+
+ # Unlink the dentry
+ self.mount_a.run_shell(["rm", "-f", "open_file"])
+
+ # Wait to see the stray count increment
+ self.wait_until_equal(
+ lambda: self.get_mdc_stat("num_strays"),
+ expect_val=1, timeout=60, reject_fn=lambda x: x > 1)
+
+ # See that while the stray count has incremented, none have passed
+ # on to the purge queue
+ self.assertEqual(self.get_mdc_stat("strays_created"), 1)
+ self.assertEqual(self.get_mdc_stat("strays_enqueued"), 0)
+
+ # See that the client still holds 2 caps
+ self.assertEqual(self.get_session(mount_a_client_id)['num_caps'], 2)
+
+ # See that the data objects remain in the data pool
+ self.assertTrue(self.fs.data_objects_present(open_file_ino, size_mb * 1024 * 1024))
+
+ # Now close the file
+ self.mount_a.kill_background(p)
+
+ # Wait to see the client cap count decrement
+ self.wait_until_equal(
+ lambda: self.get_session(mount_a_client_id)['num_caps'],
+ expect_val=1, timeout=60, reject_fn=lambda x: x > 2 or x < 1
+ )
+ # Wait to see the purge counter increment, stray count go to zero
+ self._wait_for_counter("mds_cache", "strays_enqueued", 1)
+ self.wait_until_equal(
+ lambda: self.get_mdc_stat("num_strays"),
+ expect_val=0, timeout=6, reject_fn=lambda x: x > 1
+ )
+ self._wait_for_counter("purge_queue", "pq_executed", 1)
+
+ # See that the data objects no longer exist
+ self.assertTrue(self.fs.data_objects_absent(open_file_ino, size_mb * 1024 * 1024))
+
+ self.await_data_pool_empty()
+
+ def test_reintegration_limit(self):
+ """
+ That the reintegration is not blocked by full directories.
+ """
+
+ LOW_LIMIT = 50
+ self.config_set('mds', 'mds_bal_fragment_size_max', str(LOW_LIMIT))
+ time.sleep(10) # for config to reach MDS; async create is fast!!
+
+ last_reintegrated = self.get_mdc_stat("strays_reintegrated")
+ self.mount_a.run_shell_payload("""
+ mkdir a b
+ for i in `seq 1 50`; do
+ touch a/"$i"
+ ln a/"$i" b/"$i"
+ done
+ sync -f a b
+ rm a/*
+ """)
+
+ self.wait_until_equal(
+ lambda: self.get_mdc_stat("num_strays"),
+ expect_val=0,
+ timeout=60
+ )
+ curr_reintegrated = self.get_mdc_stat("strays_reintegrated")
+ self.assertGreater(curr_reintegrated, last_reintegrated)
+
+
+ def test_hardlink_reintegration(self):
+ """
+ That removal of primary dentry of hardlinked inode results
+ in reintegration of inode into the previously-remote dentry,
+ rather than lingering as a stray indefinitely.
+ """
+ # Write some bytes to file_a
+ size_mb = 8
+ self.mount_a.run_shell(["mkdir", "dir_1"])
+ self.mount_a.write_n_mb("dir_1/file_a", size_mb)
+ ino = self.mount_a.path_to_ino("dir_1/file_a")
+
+ # Create a hardlink named file_b
+ self.mount_a.run_shell(["mkdir", "dir_2"])
+ self.mount_a.run_shell(["ln", "dir_1/file_a", "dir_2/file_b"])
+ self.assertEqual(self.mount_a.path_to_ino("dir_2/file_b"), ino)
+
+ # Flush journal
+ self.fs.mds_asok(['flush', 'journal'])
+
+ # See that backtrace for the file points to the file_a path
+ pre_unlink_bt = self.fs.read_backtrace(ino)
+ self.assertEqual(pre_unlink_bt['ancestors'][0]['dname'], "file_a")
+
+ # empty mds cache. otherwise mds reintegrates stray when unlink finishes
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(['flush', 'journal'])
+ self.fs.mds_fail_restart()
+ self.fs.wait_for_daemons()
+ self.mount_a.mount_wait()
+
+ # Unlink file_a
+ self.mount_a.run_shell(["rm", "-f", "dir_1/file_a"])
+
+ # See that a stray was created
+ self.assertEqual(self.get_mdc_stat("num_strays"), 1)
+ self.assertEqual(self.get_mdc_stat("strays_created"), 1)
+
+ # Wait, see that data objects are still present (i.e. that the
+ # stray did not advance to purging given time)
+ time.sleep(30)
+ self.assertTrue(self.fs.data_objects_present(ino, size_mb * 1024 * 1024))
+ self.assertEqual(self.get_mdc_stat("strays_enqueued"), 0)
+
+ # See that before reintegration, the inode's backtrace points to a stray dir
+ self.fs.mds_asok(['flush', 'journal'])
+ self.assertTrue(self.get_backtrace_path(ino).startswith("stray"))
+
+ last_reintegrated = self.get_mdc_stat("strays_reintegrated")
+
+ # Do a metadata operation on the remaining link (mv is heavy handed, but
+ # others like touch may be satisfied from caps without poking MDS)
+ self.mount_a.run_shell(["mv", "dir_2/file_b", "dir_2/file_c"])
+
+ # Stray reintegration should happen as a result of the eval_remote call
+ # on responding to a client request.
+ self.wait_until_equal(
+ lambda: self.get_mdc_stat("num_strays"),
+ expect_val=0,
+ timeout=60
+ )
+
+ # See the reintegration counter increment
+ curr_reintegrated = self.get_mdc_stat("strays_reintegrated")
+ self.assertGreater(curr_reintegrated, last_reintegrated)
+ last_reintegrated = curr_reintegrated
+
+ # Flush the journal
+ self.fs.mds_asok(['flush', 'journal'])
+
+ # See that the backtrace for the file points to the remaining link's path
+ post_reint_bt = self.fs.read_backtrace(ino)
+ self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "file_c")
+
+ # mds should reintegrates stray when unlink finishes
+ self.mount_a.run_shell(["ln", "dir_2/file_c", "dir_2/file_d"])
+ self.mount_a.run_shell(["rm", "-f", "dir_2/file_c"])
+
+ # Stray reintegration should happen as a result of the notify_stray call
+ # on completion of unlink
+ self.wait_until_equal(
+ lambda: self.get_mdc_stat("num_strays"),
+ expect_val=0,
+ timeout=60
+ )
+
+ # See the reintegration counter increment
+ curr_reintegrated = self.get_mdc_stat("strays_reintegrated")
+ self.assertGreater(curr_reintegrated, last_reintegrated)
+ last_reintegrated = curr_reintegrated
+
+ # Flush the journal
+ self.fs.mds_asok(['flush', 'journal'])
+
+ # See that the backtrace for the file points to the newest link's path
+ post_reint_bt = self.fs.read_backtrace(ino)
+ self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "file_d")
+
+ # Now really delete it
+ self.mount_a.run_shell(["rm", "-f", "dir_2/file_d"])
+ self._wait_for_counter("mds_cache", "strays_enqueued", 1)
+ self._wait_for_counter("purge_queue", "pq_executed", 1)
+
+ self.assert_purge_idle()
+ self.assertTrue(self.fs.data_objects_absent(ino, size_mb * 1024 * 1024))
+
+ # We caused the inode to go stray 3 times
+ self.assertEqual(self.get_mdc_stat("strays_created"), 3)
+ # We purged it at the last
+ self.assertEqual(self.get_mdc_stat("strays_enqueued"), 1)
+
+ def test_reintegration_via_scrub(self):
+ """
+ That reintegration is triggered via recursive scrub.
+ """
+
+ self.mount_a.run_shell_payload("""
+ mkdir -p a b
+ for i in `seq 1 50`; do
+ touch a/"$i"
+ ln a/"$i" b/"$i"
+ done
+ sync -f .
+ """)
+
+ self.mount_a.remount() # drop caps/cache
+ self.fs.rank_tell(["flush", "journal"])
+ self.fs.rank_fail()
+ self.fs.wait_for_daemons()
+
+ # only / in cache, reintegration cannot happen
+ self.wait_until_equal(
+ lambda: len(self.fs.rank_tell(["dump", "tree", "/"])),
+ expect_val=3,
+ timeout=60
+ )
+
+ last_reintegrated = self.get_mdc_stat("strays_reintegrated")
+ self.mount_a.run_shell_payload("""
+ rm a/*
+ sync -f .
+ """)
+ self.wait_until_equal(
+ lambda: len(self.fs.rank_tell(["dump", "tree", "/"])),
+ expect_val=3,
+ timeout=60
+ )
+ self.assertEqual(self.get_mdc_stat("num_strays"), 50)
+ curr_reintegrated = self.get_mdc_stat("strays_reintegrated")
+ self.assertEqual(last_reintegrated, curr_reintegrated)
+
+ self.fs.rank_tell(["scrub", "start", "/", "recursive,force"])
+
+ self.wait_until_equal(
+ lambda: self.get_mdc_stat("num_strays"),
+ expect_val=0,
+ timeout=60
+ )
+ curr_reintegrated = self.get_mdc_stat("strays_reintegrated")
+ # N.B.: reintegrate (rename RPC) may be tried multiple times from different code paths
+ self.assertGreaterEqual(curr_reintegrated, last_reintegrated+50)
+
+ def test_mv_hardlink_cleanup(self):
+ """
+ That when doing a rename from A to B, and B has hardlinks,
+ then we make a stray for B which is then reintegrated
+ into one of his hardlinks.
+ """
+ # Create file_a, file_b, and a hardlink to file_b
+ size_mb = 8
+ self.mount_a.write_n_mb("file_a", size_mb)
+ file_a_ino = self.mount_a.path_to_ino("file_a")
+
+ self.mount_a.write_n_mb("file_b", size_mb)
+ file_b_ino = self.mount_a.path_to_ino("file_b")
+
+ self.mount_a.run_shell(["ln", "file_b", "linkto_b"])
+ self.assertEqual(self.mount_a.path_to_ino("linkto_b"), file_b_ino)
+
+ # mv file_a file_b
+ self.mount_a.run_shell(["mv", "file_a", "file_b"])
+
+ # Stray reintegration should happen as a result of the notify_stray call on
+ # completion of rename
+ self.wait_until_equal(
+ lambda: self.get_mdc_stat("num_strays"),
+ expect_val=0,
+ timeout=60
+ )
+
+ self.assertEqual(self.get_mdc_stat("strays_created"), 1)
+ self.assertGreaterEqual(self.get_mdc_stat("strays_reintegrated"), 1)
+
+ # No data objects should have been deleted, as both files still have linkage.
+ self.assertTrue(self.fs.data_objects_present(file_a_ino, size_mb * 1024 * 1024))
+ self.assertTrue(self.fs.data_objects_present(file_b_ino, size_mb * 1024 * 1024))
+
+ self.fs.mds_asok(['flush', 'journal'])
+
+ post_reint_bt = self.fs.read_backtrace(file_b_ino)
+ self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "linkto_b")
+
+ def _setup_two_ranks(self):
+ # Set up two MDSs
+ self.fs.set_max_mds(2)
+
+ # See that we have two active MDSs
+ self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30,
+ reject_fn=lambda v: v > 2 or v < 1)
+
+ active_mds_names = self.fs.get_active_names()
+ rank_0_id = active_mds_names[0]
+ rank_1_id = active_mds_names[1]
+ log.info("Ranks 0 and 1 are {0} and {1}".format(
+ rank_0_id, rank_1_id))
+
+ # Get rid of other MDS daemons so that it's easier to know which
+ # daemons to expect in which ranks after restarts
+ for unneeded_mds in set(self.mds_cluster.mds_ids) - {rank_0_id, rank_1_id}:
+ self.mds_cluster.mds_stop(unneeded_mds)
+ self.mds_cluster.mds_fail(unneeded_mds)
+
+ return rank_0_id, rank_1_id
+
+ def _force_migrate(self, path, rank=1):
+ """
+ :param to_id: MDS id to move it to
+ :param path: Filesystem path (string) to move
+ :return: None
+ """
+ self.mount_a.run_shell(["setfattr", "-n", "ceph.dir.pin", "-v", str(rank), path])
+ rpath = "/"+path
+ self._wait_subtrees([(rpath, rank)], rank=rank, path=rpath)
+
+ def _is_stopped(self, rank):
+ mds_map = self.fs.get_mds_map()
+ return rank not in [i['rank'] for i in mds_map['info'].values()]
+
+ def test_purge_on_shutdown(self):
+ """
+ That when an MDS rank is shut down, its purge queue is
+ drained in the process.
+ """
+ rank_0_id, rank_1_id = self._setup_two_ranks()
+
+ self.set_conf("mds.{0}".format(rank_1_id), 'mds_max_purge_files', "0")
+ self.mds_cluster.mds_fail_restart(rank_1_id)
+ self.fs.wait_for_daemons()
+
+ file_count = 5
+
+ self.mount_a.create_n_files("delete_me/file", file_count)
+
+ self._force_migrate("delete_me")
+
+ self.mount_a.run_shell(["rm", "-rf", Raw("delete_me/*")])
+ self.mount_a.umount_wait()
+
+ # See all the strays go into purge queue
+ self._wait_for_counter("mds_cache", "strays_created", file_count, mds_id=rank_1_id)
+ self._wait_for_counter("mds_cache", "strays_enqueued", file_count, mds_id=rank_1_id)
+ self.assertEqual(self.get_stat("mds_cache", "num_strays", mds_id=rank_1_id), 0)
+
+ # See nothing get purged from the purge queue (yet)
+ time.sleep(10)
+ self.assertEqual(self.get_stat("purge_queue", "pq_executed", mds_id=rank_1_id), 0)
+
+ # Shut down rank 1
+ self.fs.set_max_mds(1)
+
+ # It shouldn't proceed past stopping because its still not allowed
+ # to purge
+ time.sleep(10)
+ self.assertEqual(self.get_stat("purge_queue", "pq_executed", mds_id=rank_1_id), 0)
+ self.assertFalse(self._is_stopped(1))
+
+ # Permit the daemon to start purging again
+ self.fs.mon_manager.raw_cluster_cmd('tell', 'mds.{0}'.format(rank_1_id),
+ 'injectargs',
+ "--mds_max_purge_files 100")
+
+ # It should now proceed through shutdown
+ self.fs.wait_for_daemons(timeout=120)
+
+ # ...and in the process purge all that data
+ self.await_data_pool_empty()
+
+ def test_migration_on_shutdown(self):
+ """
+ That when an MDS rank is shut down, any non-purgeable strays
+ get migrated to another rank.
+ """
+
+ rank_0_id, rank_1_id = self._setup_two_ranks()
+
+ # Create a non-purgeable stray in a ~mds1 stray directory
+ # by doing a hard link and deleting the original file
+ self.mount_a.run_shell_payload("""
+mkdir dir_1 dir_2
+touch dir_1/original
+ln dir_1/original dir_2/linkto
+""")
+
+ self._force_migrate("dir_1")
+ self._force_migrate("dir_2", rank=0)
+
+ # empty mds cache. otherwise mds reintegrates stray when unlink finishes
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(['flush', 'journal'], rank_1_id)
+ self.fs.mds_asok(['cache', 'drop'], rank_1_id)
+
+ self.mount_a.mount_wait()
+ self.mount_a.run_shell(["rm", "-f", "dir_1/original"])
+ self.mount_a.umount_wait()
+
+ self._wait_for_counter("mds_cache", "strays_created", 1,
+ mds_id=rank_1_id)
+
+ # Shut down rank 1
+ self.fs.set_max_mds(1)
+ self.fs.wait_for_daemons(timeout=120)
+
+ # See that the stray counter on rank 0 has incremented
+ self.assertEqual(self.get_mdc_stat("strays_created", rank_0_id), 1)
+
+ def test_migrate_unlinked_dir(self):
+ """
+ Reproduce https://tracker.ceph.com/issues/53597
+ """
+ rank_0_id, rank_1_id = self._setup_two_ranks()
+
+ self.mount_a.run_shell_payload("""
+mkdir pin
+touch pin/placeholder
+""")
+
+ self._force_migrate("pin")
+
+ # Hold the dir open so it cannot be purged
+ p = self.mount_a.open_dir_background("pin/to-be-unlinked")
+
+ # Unlink the dentry
+ self.mount_a.run_shell(["rmdir", "pin/to-be-unlinked"])
+
+ # Wait to see the stray count increment
+ self.wait_until_equal(
+ lambda: self.get_mdc_stat("num_strays", mds_id=rank_1_id),
+ expect_val=1, timeout=60, reject_fn=lambda x: x > 1)
+ # but not purged
+ self.assertEqual(self.get_mdc_stat("strays_created", mds_id=rank_1_id), 1)
+ self.assertEqual(self.get_mdc_stat("strays_enqueued", mds_id=rank_1_id), 0)
+
+ # Test loading unlinked dir into cache
+ self.fs.mds_asok(['flush', 'journal'], rank_1_id)
+ self.fs.mds_asok(['cache', 'drop'], rank_1_id)
+
+ # Shut down rank 1
+ self.fs.set_max_mds(1)
+ self.fs.wait_for_daemons(timeout=120)
+ # Now the stray should be migrated to rank 0
+ # self.assertEqual(self.get_mdc_stat("strays_created", mds_id=rank_0_id), 1)
+ # https://github.com/ceph/ceph/pull/44335#issuecomment-1125940158
+
+ self.mount_a.kill_background(p)
+
+ def assert_backtrace(self, ino, expected_path):
+ """
+ Assert that the backtrace in the data pool for an inode matches
+ an expected /foo/bar path.
+ """
+ expected_elements = expected_path.strip("/").split("/")
+ bt = self.fs.read_backtrace(ino)
+ actual_elements = list(reversed([dn['dname'] for dn in bt['ancestors']]))
+ self.assertListEqual(expected_elements, actual_elements)
+
+ def get_backtrace_path(self, ino):
+ bt = self.fs.read_backtrace(ino)
+ elements = reversed([dn['dname'] for dn in bt['ancestors']])
+ return "/".join(elements)
+
+ def assert_purge_idle(self):
+ """
+ Assert that the MDS perf counters indicate no strays exist and
+ no ongoing purge activity. Sanity check for when PurgeQueue should
+ be idle.
+ """
+ mdc_stats = self.fs.mds_asok(['perf', 'dump', "mds_cache"])['mds_cache']
+ pq_stats = self.fs.mds_asok(['perf', 'dump', "purge_queue"])['purge_queue']
+ self.assertEqual(mdc_stats["num_strays"], 0)
+ self.assertEqual(mdc_stats["num_strays_delayed"], 0)
+ self.assertEqual(pq_stats["pq_executing"], 0)
+ self.assertEqual(pq_stats["pq_executing_ops"], 0)
+
+ def test_mv_cleanup(self):
+ """
+ That when doing a rename from A to B, and B has no hardlinks,
+ then we make a stray for B and purge him.
+ """
+ # Create file_a and file_b, write some to both
+ size_mb = 8
+ self.mount_a.write_n_mb("file_a", size_mb)
+ file_a_ino = self.mount_a.path_to_ino("file_a")
+ self.mount_a.write_n_mb("file_b", size_mb)
+ file_b_ino = self.mount_a.path_to_ino("file_b")
+
+ self.fs.mds_asok(['flush', 'journal'])
+ self.assert_backtrace(file_a_ino, "file_a")
+ self.assert_backtrace(file_b_ino, "file_b")
+
+ # mv file_a file_b
+ self.mount_a.run_shell(['mv', 'file_a', 'file_b'])
+
+ # See that stray counter increments
+ self.assertEqual(self.get_mdc_stat("strays_created"), 1)
+ # Wait for purge counter to increment
+ self._wait_for_counter("mds_cache", "strays_enqueued", 1)
+ self._wait_for_counter("purge_queue", "pq_executed", 1)
+
+ self.assert_purge_idle()
+
+ # file_b should have been purged
+ self.assertTrue(self.fs.data_objects_absent(file_b_ino, size_mb * 1024 * 1024))
+
+ # Backtrace should have updated from file_a to file_b
+ self.fs.mds_asok(['flush', 'journal'])
+ self.assert_backtrace(file_a_ino, "file_b")
+
+ # file_a's data should still exist
+ self.assertTrue(self.fs.data_objects_present(file_a_ino, size_mb * 1024 * 1024))
+
+ def _pool_df(self, pool_name):
+ """
+ Return a dict like
+ {
+ "kb_used": 0,
+ "bytes_used": 0,
+ "max_avail": 19630292406,
+ "objects": 0
+ }
+
+ :param pool_name: Which pool (must exist)
+ """
+ out = self.fs.mon_manager.raw_cluster_cmd("df", "--format=json-pretty")
+ for p in json.loads(out)['pools']:
+ if p['name'] == pool_name:
+ return p['stats']
+
+ raise RuntimeError("Pool '{0}' not found".format(pool_name))
+
+ def await_data_pool_empty(self):
+ self.wait_until_true(
+ lambda: self._pool_df(
+ self.fs.get_data_pool_name()
+ )['objects'] == 0,
+ timeout=60)
+
+ def test_snapshot_remove(self):
+ """
+ That removal of a snapshot that references a now-unlinked file results
+ in purging on the stray for the file.
+ """
+ # Enable snapshots
+ self.fs.set_allow_new_snaps(True)
+
+ # Create a dir with a file in it
+ size_mb = 8
+ self.mount_a.run_shell(["mkdir", "snapdir"])
+ self.mount_a.run_shell(["mkdir", "snapdir/subdir"])
+ self.mount_a.write_test_pattern("snapdir/subdir/file_a", size_mb * 1024 * 1024)
+ file_a_ino = self.mount_a.path_to_ino("snapdir/subdir/file_a")
+
+ # Snapshot the dir
+ self.mount_a.run_shell(["mkdir", "snapdir/.snap/snap1"])
+
+ # Cause the head revision to deviate from the snapshot
+ self.mount_a.write_n_mb("snapdir/subdir/file_a", size_mb)
+
+ # Flush the journal so that backtraces, dirfrag objects will actually be written
+ self.fs.mds_asok(["flush", "journal"])
+
+ # Unlink the file
+ self.mount_a.run_shell(["rm", "-f", "snapdir/subdir/file_a"])
+ self.mount_a.run_shell(["rmdir", "snapdir/subdir"])
+
+ # Unmount the client because when I come back to check the data is still
+ # in the file I don't want to just see what's in the page cache.
+ self.mount_a.umount_wait()
+
+ self.assertEqual(self.get_mdc_stat("strays_created"), 2)
+
+ # FIXME: at this stage we see a purge and the stray count drops to
+ # zero, but there's actually still a stray, so at the very
+ # least the StrayManager stats code is slightly off
+
+ self.mount_a.mount_wait()
+
+ # See that the data from the snapshotted revision of the file is still present
+ # and correct
+ self.mount_a.validate_test_pattern("snapdir/.snap/snap1/subdir/file_a", size_mb * 1024 * 1024)
+
+ # Remove the snapshot
+ self.mount_a.run_shell(["rmdir", "snapdir/.snap/snap1"])
+
+ # Purging file_a doesn't happen until after we've flushed the journal, because
+ # it is referenced by the snapshotted subdir, and the snapshot isn't really
+ # gone until the journal references to it are gone
+ self.fs.mds_asok(["flush", "journal"])
+
+ # Wait for purging to complete, which requires the OSDMap to propagate to the OSDs.
+ # See also: http://tracker.ceph.com/issues/20072
+ self.wait_until_true(
+ lambda: self.fs.data_objects_absent(file_a_ino, size_mb * 1024 * 1024),
+ timeout=60
+ )
+
+ # See that a purge happens now
+ self._wait_for_counter("mds_cache", "strays_enqueued", 2)
+ self._wait_for_counter("purge_queue", "pq_executed", 2)
+
+ self.await_data_pool_empty()
+
+ def test_fancy_layout(self):
+ """
+ purge stray file with fancy layout
+ """
+
+ file_name = "fancy_layout_file"
+ self.mount_a.run_shell(["touch", file_name])
+
+ file_layout = "stripe_unit=1048576 stripe_count=4 object_size=8388608"
+ self.mount_a.setfattr(file_name, "ceph.file.layout", file_layout)
+
+ # 35MB requires 7 objects
+ size_mb = 35
+ self.mount_a.write_n_mb(file_name, size_mb)
+
+ self.mount_a.run_shell(["rm", "-f", file_name])
+ self.fs.mds_asok(["flush", "journal"])
+
+ # can't use self.fs.data_objects_absent here, it does not support fancy layout
+ self.await_data_pool_empty()
+
+ def test_dirfrag_limit(self):
+ """
+ That the directory fragment size cannot exceed mds_bal_fragment_size_max (using a limit of 50 in all configurations).
+ """
+
+ LOW_LIMIT = 50
+ self.config_set('mds', 'mds_bal_fragment_size_max', str(LOW_LIMIT))
+ time.sleep(10) # for config to reach MDS; async create is fast!!
+
+ try:
+ self.mount_a.create_n_files("subdir/file", LOW_LIMIT+1, finaldirsync=True)
+ except CommandFailedError:
+ pass # ENOSPC
+ else:
+ self.fail("fragment size exceeded")
+
+
+ def test_dirfrag_limit_fragmented(self):
+ """
+ That fragmentation (forced) will allow more entries to be created.
+ """
+
+ LOW_LIMIT = 50
+ self.config_set('mds', 'mds_bal_fragment_size_max', str(LOW_LIMIT))
+ self.config_set('mds', 'mds_bal_merge_size', 1) # disable merging
+ time.sleep(10) # for config to reach MDS; async create is fast!!
+
+ # Test that we can go beyond the limit if we fragment the directory
+ self.mount_a.create_n_files("subdir/file", LOW_LIMIT, finaldirsync=True)
+ self.mount_a.umount_wait() # release client caps
+
+ # Ensure that subdir is fragmented
+ self.fs.rank_asok(["dirfrag", "split", "/subdir", "0/0", "1"])
+ self.fs.rank_asok(["flush", "journal"])
+
+ # Create 50% more files than the current fragment limit
+ self.mount_a.mount_wait()
+ self.mount_a.create_n_files("subdir/file", (LOW_LIMIT*3)//2, finaldirsync=True)
+
+ def test_dirfrag_limit_strays(self):
+ """
+ That unlinking fails when the stray directory fragment becomes too
+ large and that unlinking may continue once those strays are purged.
+ """
+
+ LOW_LIMIT = 10
+ # N.B. this test is inherently racy because stray removal may be faster
+ # than slow(er) file creation.
+ self.config_set('mds', 'mds_bal_fragment_size_max', LOW_LIMIT)
+ time.sleep(10) # for config to reach MDS; async create is fast!!
+
+ # Now test the stray directory size is limited and recovers
+ strays_before = self.get_mdc_stat("strays_created")
+ try:
+ # 10 stray directories: expect collisions
+ self.mount_a.create_n_files("subdir/file", LOW_LIMIT*10, finaldirsync=True, unlink=True)
+ except CommandFailedError:
+ pass # ENOSPC
+ else:
+ self.fail("fragment size exceeded")
+ strays_after = self.get_mdc_stat("strays_created")
+ self.assertGreaterEqual(strays_after-strays_before, LOW_LIMIT)
+
+ self._wait_for_counter("mds_cache", "strays_enqueued", strays_after)
+ self._wait_for_counter("purge_queue", "pq_executed", strays_after)
+
+ # verify new files can be created and unlinked
+ self.mount_a.create_n_files("subdir/file", LOW_LIMIT, dirsync=True, unlink=True)
+
+ def test_purge_queue_upgrade(self):
+ """
+ That when starting on a system with no purge queue in the metadata
+ pool, we silently create one.
+ :return:
+ """
+
+ self.mds_cluster.mds_stop()
+ self.mds_cluster.mds_fail()
+ self.fs.radosm(["rm", "500.00000000"])
+ self.mds_cluster.mds_restart()
+ self.fs.wait_for_daemons()
+
+ def test_replicated_delete_speed(self):
+ """
+ That deletions of replicated metadata are not pathologically slow
+ """
+ rank_0_id, rank_1_id = self._setup_two_ranks()
+
+ self.set_conf("mds.{0}".format(rank_1_id), 'mds_max_purge_files', "0")
+ self.mds_cluster.mds_fail_restart(rank_1_id)
+ self.fs.wait_for_daemons()
+
+ file_count = 10
+
+ self.mount_a.create_n_files("delete_me/file", file_count)
+
+ self._force_migrate("delete_me")
+
+ begin = datetime.datetime.now()
+ self.mount_a.run_shell(["rm", "-rf", Raw("delete_me/*")])
+ end = datetime.datetime.now()
+
+ # What we're really checking here is that we are completing client
+ # operations immediately rather than delaying until the next tick.
+ tick_period = float(self.fs.get_config("mds_tick_interval",
+ service_type="mds"))
+
+ duration = (end - begin).total_seconds()
+ self.assertLess(duration, (file_count * tick_period) * 0.25)
diff --git a/qa/tasks/cephfs/test_subvolume.py b/qa/tasks/cephfs/test_subvolume.py
new file mode 100644
index 000000000..1ebb137dd
--- /dev/null
+++ b/qa/tasks/cephfs/test_subvolume.py
@@ -0,0 +1,170 @@
+import logging
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.exceptions import CommandFailedError
+
+log = logging.getLogger(__name__)
+
+
+class TestSubvolume(CephFSTestCase):
+ CLIENTS_REQUIRED = 1
+ MDSS_REQUIRED = 1
+
+ def setUp(self):
+ super().setUp()
+ self.setup_test()
+
+ def tearDown(self):
+ # clean up
+ self.cleanup_test()
+ super().tearDown()
+
+ def setup_test(self):
+ self.mount_a.run_shell(['mkdir', 'group'])
+ self.mount_a.run_shell(['mkdir', 'group/subvol1'])
+ self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
+ '-v', '1', 'group/subvol1'])
+ self.mount_a.run_shell(['mv', 'group/subvol1', 'group/subvol2'])
+
+ def cleanup_test(self):
+ self.mount_a.run_shell(['rm', '-rf', 'group'])
+
+ def test_subvolume_move_out_file(self):
+ """
+ To verify that file can't be moved out of subvolume
+ """
+ self.mount_a.run_shell(['touch', 'group/subvol2/file1'])
+
+ # file can't be moved out of a subvolume
+ with self.assertRaises(CommandFailedError):
+ self.mount_a.run_shell(['rename', 'group/subvol2/file1',
+ 'group/file1', 'group/subvol2/file1'])
+
+
+ def test_subvolume_move_in_file(self):
+ """
+ To verify that file can't be moved into subvolume
+ """
+ # file can't be moved into a subvolume
+ self.mount_a.run_shell(['touch', 'group/file2'])
+ with self.assertRaises(CommandFailedError):
+ self.mount_a.run_shell(['rename', 'group/file2',
+ 'group/subvol2/file2', 'group/file2'])
+
+ def test_subvolume_hardlink_to_outside(self):
+ """
+ To verify that file can't be hardlinked to outside subvolume
+ """
+ self.mount_a.run_shell(['touch', 'group/subvol2/file1'])
+
+ # create hard link within subvolume
+ self.mount_a.run_shell(['ln',
+ 'group/subvol2/file1', 'group/subvol2/file1_'])
+
+ # hard link can't be created out of subvolume
+ with self.assertRaises(CommandFailedError):
+ self.mount_a.run_shell(['ln',
+ 'group/subvol2/file1', 'group/file1_'])
+
+ def test_subvolume_hardlink_to_inside(self):
+ """
+ To verify that file can't be hardlinked to inside subvolume
+ """
+ self.mount_a.run_shell(['touch', 'group/subvol2/file1'])
+
+ # create hard link within subvolume
+ self.mount_a.run_shell(['ln',
+ 'group/subvol2/file1', 'group/subvol2/file1_'])
+
+ # hard link can't be created inside subvolume
+ self.mount_a.run_shell(['touch', 'group/file2'])
+ with self.assertRaises(CommandFailedError):
+ self.mount_a.run_shell(['ln',
+ 'group/file2', 'group/subvol2/file2_'])
+
+ def test_subvolume_snapshot_inside_subvolume_subdir(self):
+ """
+ To verify that snapshot can't be taken for a subvolume subdir
+ """
+ self.mount_a.run_shell(['touch', 'group/subvol2/file1'])
+
+ # create snapshot at subvolume root
+ self.mount_a.run_shell(['mkdir', 'group/subvol2/.snap/s1'])
+
+ # can't create snapshot in a descendent dir of subvolume
+ self.mount_a.run_shell(['mkdir', 'group/subvol2/dir'])
+ with self.assertRaises(CommandFailedError):
+ self.mount_a.run_shell(['mkdir', 'group/subvol2/dir/.snap/s2'])
+
+ # clean up
+ self.mount_a.run_shell(['rmdir', 'group/subvol2/.snap/s1'])
+
+ def test_subvolume_file_move_across_subvolumes(self):
+ """
+ To verify that file can't be moved across subvolumes
+ """
+ self.mount_a.run_shell(['touch', 'group/subvol2/file1'])
+
+ # create another subvol
+ self.mount_a.run_shell(['mkdir', 'group/subvol3'])
+ self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
+ '-v', '1', 'group/subvol3'])
+
+ # can't move file across subvolumes
+ with self.assertRaises(CommandFailedError):
+ self.mount_a.run_shell(['rename', 'group/subvol2/file1',
+ 'group/subvol3/file1',
+ 'group/subvol2/file1'])
+
+ def test_subvolume_hardlink_across_subvolumes(self):
+ """
+ To verify that hardlink can't be created across subvolumes
+ """
+ self.mount_a.run_shell(['touch', 'group/subvol2/file1'])
+
+ # create another subvol
+ self.mount_a.run_shell(['mkdir', 'group/subvol3'])
+ self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
+ '-v', '1', 'group/subvol3'])
+
+ # can't create hard link across subvolumes
+ with self.assertRaises(CommandFailedError):
+ self.mount_a.run_shell(['ln', 'group/subvol2/file1',
+ 'group/subvol3/file1'])
+
+ def test_subvolume_create_subvolume_inside_subvolume(self):
+ """
+ To verify that subvolume can't be created inside a subvolume
+ """
+ # can't create subvolume inside a subvolume
+ self.mount_a.run_shell(['mkdir', 'group/subvol2/dir'])
+ with self.assertRaises(CommandFailedError):
+ self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
+ '-v', '1', 'group/subvol2/dir'])
+
+ def test_subvolume_create_snapshot_inside_new_subvolume_parent(self):
+ """
+ To verify that subvolume can't be created inside a new subvolume parent
+ """
+ self.mount_a.run_shell(['touch', 'group/subvol2/file1'])
+
+ # clear subvolume flag
+ self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
+ '-v', '0', 'group/subvol2'])
+
+ # create a snap
+ self.mount_a.run_shell(['mkdir', 'group/subvol2/dir'])
+ self.mount_a.run_shell(['mkdir', 'group/subvol2/dir/.snap/s2'])
+
+ # override subdir subvolume with parent subvolume
+ self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
+ '-v', '1', 'group/subvol2/dir'])
+ self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume',
+ '-v', '1', 'group/subvol2'])
+
+ # can't create a snap in a subdir of a subvol parent
+ with self.assertRaises(CommandFailedError):
+ self.mount_a.run_shell(['mkdir', 'group/subvol2/dir/.snap/s3'])
+
+ # clean up
+ self.mount_a.run_shell(['rmdir', 'group/subvol2/dir/.snap/s2'])
diff --git a/qa/tasks/cephfs/test_volumes.py b/qa/tasks/cephfs/test_volumes.py
new file mode 100644
index 000000000..2ecfeb327
--- /dev/null
+++ b/qa/tasks/cephfs/test_volumes.py
@@ -0,0 +1,7946 @@
+import os
+import json
+import time
+import errno
+import random
+import logging
+import collections
+import uuid
+import unittest
+from hashlib import md5
+from textwrap import dedent
+from io import StringIO
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from tasks.cephfs.fuse_mount import FuseMount
+from teuthology.exceptions import CommandFailedError
+
+log = logging.getLogger(__name__)
+
+class TestVolumesHelper(CephFSTestCase):
+ """Helper class for testing FS volume, subvolume group and subvolume operations."""
+ TEST_VOLUME_PREFIX = "volume"
+ TEST_SUBVOLUME_PREFIX="subvolume"
+ TEST_GROUP_PREFIX="group"
+ TEST_SNAPSHOT_PREFIX="snapshot"
+ TEST_CLONE_PREFIX="clone"
+ TEST_FILE_NAME_PREFIX="subvolume_file"
+
+ # for filling subvolume with data
+ CLIENTS_REQUIRED = 2
+ MDSS_REQUIRED = 2
+
+ # io defaults
+ DEFAULT_FILE_SIZE = 1 # MB
+ DEFAULT_NUMBER_OF_FILES = 1024
+
+ def _fs_cmd(self, *args):
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", *args)
+
+ def _raw_cmd(self, *args):
+ return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args)
+
+ def __check_clone_state(self, state, clone, clone_group=None, timo=120):
+ check = 0
+ args = ["clone", "status", self.volname, clone]
+ if clone_group:
+ args.append(clone_group)
+ args = tuple(args)
+ while check < timo:
+ result = json.loads(self._fs_cmd(*args))
+ if result["status"]["state"] == state:
+ break
+ check += 1
+ time.sleep(1)
+ self.assertTrue(check < timo)
+
+ def _get_clone_status(self, clone, clone_group=None):
+ args = ["clone", "status", self.volname, clone]
+ if clone_group:
+ args.append(clone_group)
+ args = tuple(args)
+ result = json.loads(self._fs_cmd(*args))
+ return result
+
+ def _wait_for_clone_to_complete(self, clone, clone_group=None, timo=120):
+ self.__check_clone_state("complete", clone, clone_group, timo)
+
+ def _wait_for_clone_to_fail(self, clone, clone_group=None, timo=120):
+ self.__check_clone_state("failed", clone, clone_group, timo)
+
+ def _wait_for_clone_to_be_in_progress(self, clone, clone_group=None, timo=120):
+ self.__check_clone_state("in-progress", clone, clone_group, timo)
+
+ def _check_clone_canceled(self, clone, clone_group=None):
+ self.__check_clone_state("canceled", clone, clone_group, timo=1)
+
+ def _get_subvolume_snapshot_path(self, subvolume, snapshot, source_group, subvol_path, source_version):
+ if source_version == 2:
+ # v2
+ if subvol_path is not None:
+ (base_path, uuid_str) = os.path.split(subvol_path)
+ else:
+ (base_path, uuid_str) = os.path.split(self._get_subvolume_path(self.volname, subvolume, group_name=source_group))
+ return os.path.join(base_path, ".snap", snapshot, uuid_str)
+
+ # v1
+ base_path = self._get_subvolume_path(self.volname, subvolume, group_name=source_group)
+ return os.path.join(base_path, ".snap", snapshot)
+
+ def _verify_clone_attrs(self, source_path, clone_path):
+ path1 = source_path
+ path2 = clone_path
+
+ p = self.mount_a.run_shell(["find", path1])
+ paths = p.stdout.getvalue().strip().split()
+
+ # for each entry in source and clone (sink) verify certain inode attributes:
+ # inode type, mode, ownership, [am]time.
+ for source_path in paths:
+ sink_entry = source_path[len(path1)+1:]
+ sink_path = os.path.join(path2, sink_entry)
+
+ # mode+type
+ sval = int(self.mount_a.run_shell(['stat', '-c' '%f', source_path]).stdout.getvalue().strip(), 16)
+ cval = int(self.mount_a.run_shell(['stat', '-c' '%f', sink_path]).stdout.getvalue().strip(), 16)
+ self.assertEqual(sval, cval)
+
+ # ownership
+ sval = int(self.mount_a.run_shell(['stat', '-c' '%u', source_path]).stdout.getvalue().strip())
+ cval = int(self.mount_a.run_shell(['stat', '-c' '%u', sink_path]).stdout.getvalue().strip())
+ self.assertEqual(sval, cval)
+
+ sval = int(self.mount_a.run_shell(['stat', '-c' '%g', source_path]).stdout.getvalue().strip())
+ cval = int(self.mount_a.run_shell(['stat', '-c' '%g', sink_path]).stdout.getvalue().strip())
+ self.assertEqual(sval, cval)
+
+ # inode timestamps
+ # do not check access as kclient will generally not update this like ceph-fuse will.
+ sval = int(self.mount_a.run_shell(['stat', '-c' '%Y', source_path]).stdout.getvalue().strip())
+ cval = int(self.mount_a.run_shell(['stat', '-c' '%Y', sink_path]).stdout.getvalue().strip())
+ self.assertEqual(sval, cval)
+
+ def _verify_clone_root(self, source_path, clone_path, clone, clone_group, clone_pool):
+ # verifies following clone root attrs quota, data_pool and pool_namespace
+ # remaining attributes of clone root are validated in _verify_clone_attrs
+
+ clone_info = json.loads(self._get_subvolume_info(self.volname, clone, clone_group))
+
+ # verify quota is inherited from source snapshot
+ src_quota = self.mount_a.getfattr(source_path, "ceph.quota.max_bytes")
+ # FIXME: kclient fails to get this quota value: https://tracker.ceph.com/issues/48075
+ if isinstance(self.mount_a, FuseMount):
+ self.assertEqual(clone_info["bytes_quota"], "infinite" if src_quota is None else int(src_quota))
+
+ if clone_pool:
+ # verify pool is set as per request
+ self.assertEqual(clone_info["data_pool"], clone_pool)
+ else:
+ # verify pool and pool namespace are inherited from snapshot
+ self.assertEqual(clone_info["data_pool"],
+ self.mount_a.getfattr(source_path, "ceph.dir.layout.pool"))
+ self.assertEqual(clone_info["pool_namespace"],
+ self.mount_a.getfattr(source_path, "ceph.dir.layout.pool_namespace"))
+
+ def _verify_clone(self, subvolume, snapshot, clone,
+ source_group=None, clone_group=None, clone_pool=None,
+ subvol_path=None, source_version=2, timo=120):
+ # pass in subvol_path (subvolume path when snapshot was taken) when subvolume is removed
+ # but snapshots are retained for clone verification
+ path1 = self._get_subvolume_snapshot_path(subvolume, snapshot, source_group, subvol_path, source_version)
+ path2 = self._get_subvolume_path(self.volname, clone, group_name=clone_group)
+
+ check = 0
+ # TODO: currently snapshot rentries are not stable if snapshot source entries
+ # are removed, https://tracker.ceph.com/issues/46747
+ while check < timo and subvol_path is None:
+ val1 = int(self.mount_a.getfattr(path1, "ceph.dir.rentries"))
+ val2 = int(self.mount_a.getfattr(path2, "ceph.dir.rentries"))
+ if val1 == val2:
+ break
+ check += 1
+ time.sleep(1)
+ self.assertTrue(check < timo)
+
+ self._verify_clone_root(path1, path2, clone, clone_group, clone_pool)
+ self._verify_clone_attrs(path1, path2)
+
+ def _generate_random_volume_name(self, count=1):
+ n = self.volume_start
+ volumes = [f"{TestVolumes.TEST_VOLUME_PREFIX}_{i:016}" for i in range(n, n+count)]
+ self.volume_start += count
+ return volumes[0] if count == 1 else volumes
+
+ def _generate_random_subvolume_name(self, count=1):
+ n = self.subvolume_start
+ subvolumes = [f"{TestVolumes.TEST_SUBVOLUME_PREFIX}_{i:016}" for i in range(n, n+count)]
+ self.subvolume_start += count
+ return subvolumes[0] if count == 1 else subvolumes
+
+ def _generate_random_group_name(self, count=1):
+ n = self.group_start
+ groups = [f"{TestVolumes.TEST_GROUP_PREFIX}_{i:016}" for i in range(n, n+count)]
+ self.group_start += count
+ return groups[0] if count == 1 else groups
+
+ def _generate_random_snapshot_name(self, count=1):
+ n = self.snapshot_start
+ snaps = [f"{TestVolumes.TEST_SNAPSHOT_PREFIX}_{i:016}" for i in range(n, n+count)]
+ self.snapshot_start += count
+ return snaps[0] if count == 1 else snaps
+
+ def _generate_random_clone_name(self, count=1):
+ n = self.clone_start
+ clones = [f"{TestVolumes.TEST_CLONE_PREFIX}_{i:016}" for i in range(n, n+count)]
+ self.clone_start += count
+ return clones[0] if count == 1 else clones
+
+ def _enable_multi_fs(self):
+ self._fs_cmd("flag", "set", "enable_multiple", "true", "--yes-i-really-mean-it")
+
+ def _create_or_reuse_test_volume(self):
+ result = json.loads(self._fs_cmd("volume", "ls"))
+ if len(result) == 0:
+ self.vol_created = True
+ self.volname = self._generate_random_volume_name()
+ self._fs_cmd("volume", "create", self.volname)
+ else:
+ self.volname = result[0]['name']
+
+ def _get_volume_info(self, vol_name, human_readable=False):
+ if human_readable:
+ args = ["volume", "info", vol_name, human_readable]
+ else:
+ args = ["volume", "info", vol_name]
+ args = tuple(args)
+ vol_md = self._fs_cmd(*args)
+ return vol_md
+
+ def _get_subvolume_group_path(self, vol_name, group_name):
+ args = ("subvolumegroup", "getpath", vol_name, group_name)
+ path = self._fs_cmd(*args)
+ # remove the leading '/', and trailing whitespaces
+ return path[1:].rstrip()
+
+ def _get_subvolume_group_info(self, vol_name, group_name):
+ args = ["subvolumegroup", "info", vol_name, group_name]
+ args = tuple(args)
+ group_md = self._fs_cmd(*args)
+ return group_md
+
+ def _get_subvolume_path(self, vol_name, subvol_name, group_name=None):
+ args = ["subvolume", "getpath", vol_name, subvol_name]
+ if group_name:
+ args.append(group_name)
+ args = tuple(args)
+ path = self._fs_cmd(*args)
+ # remove the leading '/', and trailing whitespaces
+ return path[1:].rstrip()
+
+ def _get_subvolume_info(self, vol_name, subvol_name, group_name=None):
+ args = ["subvolume", "info", vol_name, subvol_name]
+ if group_name:
+ args.append(group_name)
+ args = tuple(args)
+ subvol_md = self._fs_cmd(*args)
+ return subvol_md
+
+ def _get_subvolume_snapshot_info(self, vol_name, subvol_name, snapname, group_name=None):
+ args = ["subvolume", "snapshot", "info", vol_name, subvol_name, snapname]
+ if group_name:
+ args.append(group_name)
+ args = tuple(args)
+ snap_md = self._fs_cmd(*args)
+ return snap_md
+
+ def _delete_test_volume(self):
+ self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+
+ def _do_subvolume_pool_and_namespace_update(self, subvolume, pool=None, pool_namespace=None, subvolume_group=None):
+ subvolpath = self._get_subvolume_path(self.volname, subvolume, group_name=subvolume_group)
+
+ if pool is not None:
+ self.mount_a.setfattr(subvolpath, 'ceph.dir.layout.pool', pool, sudo=True)
+
+ if pool_namespace is not None:
+ self.mount_a.setfattr(subvolpath, 'ceph.dir.layout.pool_namespace', pool_namespace, sudo=True)
+
+ def _do_subvolume_attr_update(self, subvolume, uid, gid, mode, subvolume_group=None):
+ subvolpath = self._get_subvolume_path(self.volname, subvolume, group_name=subvolume_group)
+
+ # mode
+ self.mount_a.run_shell(['sudo', 'chmod', mode, subvolpath], omit_sudo=False)
+
+ # ownership
+ self.mount_a.run_shell(['sudo', 'chown', uid, subvolpath], omit_sudo=False)
+ self.mount_a.run_shell(['sudo', 'chgrp', gid, subvolpath], omit_sudo=False)
+
+ def _do_subvolume_io(self, subvolume, subvolume_group=None, create_dir=None,
+ number_of_files=DEFAULT_NUMBER_OF_FILES, file_size=DEFAULT_FILE_SIZE):
+ # get subvolume path for IO
+ args = ["subvolume", "getpath", self.volname, subvolume]
+ if subvolume_group:
+ args.append(subvolume_group)
+ args = tuple(args)
+ subvolpath = self._fs_cmd(*args)
+ self.assertNotEqual(subvolpath, None)
+ subvolpath = subvolpath[1:].rstrip() # remove "/" prefix and any trailing newline
+
+ io_path = subvolpath
+ if create_dir:
+ io_path = os.path.join(subvolpath, create_dir)
+ self.mount_a.run_shell_payload(f"mkdir -p {io_path}")
+
+ log.debug("filling subvolume {0} with {1} files each {2}MB size under directory {3}".format(subvolume, number_of_files, file_size, io_path))
+ for i in range(number_of_files):
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i)
+ self.mount_a.write_n_mb(os.path.join(io_path, filename), file_size)
+
+ def _do_subvolume_io_mixed(self, subvolume, subvolume_group=None):
+ subvolpath = self._get_subvolume_path(self.volname, subvolume, group_name=subvolume_group)
+
+ reg_file = "regfile.0"
+ dir_path = os.path.join(subvolpath, "dir.0")
+ sym_path1 = os.path.join(subvolpath, "sym.0")
+ # this symlink's ownership would be changed
+ sym_path2 = os.path.join(dir_path, "sym.0")
+
+ self.mount_a.run_shell(["mkdir", dir_path])
+ self.mount_a.run_shell(["ln", "-s", "./{}".format(reg_file), sym_path1])
+ self.mount_a.run_shell(["ln", "-s", "./{}".format(reg_file), sym_path2])
+ # flip ownership to nobody. assumption: nobody's id is 65534
+ self.mount_a.run_shell(["sudo", "chown", "-h", "65534:65534", sym_path2], omit_sudo=False)
+
+ def _wait_for_trash_empty(self, timeout=60):
+ # XXX: construct the trash dir path (note that there is no mgr
+ # [sub]volume interface for this).
+ trashdir = os.path.join("./", "volumes", "_deleting")
+ self.mount_a.wait_for_dir_empty(trashdir, timeout=timeout)
+
+ def _wait_for_subvol_trash_empty(self, subvol, group="_nogroup", timeout=30):
+ trashdir = os.path.join("./", "volumes", group, subvol, ".trash")
+ try:
+ self.mount_a.wait_for_dir_empty(trashdir, timeout=timeout)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ pass
+ else:
+ raise
+
+ def _assert_meta_location_and_version(self, vol_name, subvol_name, subvol_group=None, version=2, legacy=False):
+ if legacy:
+ subvol_path = self._get_subvolume_path(vol_name, subvol_name, group_name=subvol_group)
+ m = md5()
+ m.update(("/"+subvol_path).encode('utf-8'))
+ meta_filename = "{0}.meta".format(m.digest().hex())
+ metapath = os.path.join(".", "volumes", "_legacy", meta_filename)
+ else:
+ group = subvol_group if subvol_group is not None else '_nogroup'
+ metapath = os.path.join(".", "volumes", group, subvol_name, ".meta")
+
+ out = self.mount_a.run_shell(['sudo', 'cat', metapath], omit_sudo=False)
+ lines = out.stdout.getvalue().strip().split('\n')
+ sv_version = -1
+ for line in lines:
+ if line == "version = " + str(version):
+ sv_version = version
+ break
+ self.assertEqual(sv_version, version, "version expected was '{0}' but got '{1}' from meta file at '{2}'".format(
+ version, sv_version, metapath))
+
+ def _create_v1_subvolume(self, subvol_name, subvol_group=None, has_snapshot=True, subvol_type='subvolume', state='complete'):
+ group = subvol_group if subvol_group is not None else '_nogroup'
+ basepath = os.path.join("volumes", group, subvol_name)
+ uuid_str = str(uuid.uuid4())
+ createpath = os.path.join(basepath, uuid_str)
+ self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False)
+
+ # create a v1 snapshot, to prevent auto upgrades
+ if has_snapshot:
+ snappath = os.path.join(createpath, ".snap", "fake")
+ self.mount_a.run_shell(['sudo', 'mkdir', '-p', snappath], omit_sudo=False)
+
+ # add required xattrs to subvolume
+ default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool")
+ self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True)
+
+ # create a v1 .meta file
+ meta_contents = "[GLOBAL]\nversion = 1\ntype = {0}\npath = {1}\nstate = {2}\n".format(subvol_type, "/" + createpath, state)
+ if state == 'pending':
+ # add a fake clone source
+ meta_contents = meta_contents + '[source]\nvolume = fake\nsubvolume = fake\nsnapshot = fake\n'
+ meta_filepath1 = os.path.join(self.mount_a.mountpoint, basepath, ".meta")
+ self.mount_a.client_remote.write_file(meta_filepath1, meta_contents, sudo=True)
+ return createpath
+
+ def _update_fake_trash(self, subvol_name, subvol_group=None, trash_name='fake', create=True):
+ group = subvol_group if subvol_group is not None else '_nogroup'
+ trashpath = os.path.join("volumes", group, subvol_name, '.trash', trash_name)
+ if create:
+ self.mount_a.run_shell(['sudo', 'mkdir', '-p', trashpath], omit_sudo=False)
+ else:
+ self.mount_a.run_shell(['sudo', 'rmdir', trashpath], omit_sudo=False)
+
+ def _configure_guest_auth(self, guest_mount, authid, key):
+ """
+ Set up auth credentials for a guest client.
+ """
+ # Create keyring file for the guest client.
+ keyring_txt = dedent("""
+ [client.{authid}]
+ key = {key}
+
+ """.format(authid=authid,key=key))
+
+ guest_mount.client_id = authid
+ guest_mount.client_remote.write_file(guest_mount.get_keyring_path(),
+ keyring_txt, sudo=True)
+ # Add a guest client section to the ceph config file.
+ self.config_set("client.{0}".format(authid), "debug client", 20)
+ self.config_set("client.{0}".format(authid), "debug objecter", 20)
+ self.set_conf("client.{0}".format(authid),
+ "keyring", guest_mount.get_keyring_path())
+
+ def _auth_metadata_get(self, filedata):
+ """
+ Return a deserialized JSON object, or None
+ """
+ try:
+ data = json.loads(filedata)
+ except json.decoder.JSONDecodeError:
+ data = None
+ return data
+
+ def setUp(self):
+ super(TestVolumesHelper, self).setUp()
+ self.volname = None
+ self.vol_created = False
+ self._enable_multi_fs()
+ self._create_or_reuse_test_volume()
+ self.config_set('mon', 'mon_allow_pool_delete', True)
+ self.volume_start = random.randint(1, (1<<20))
+ self.subvolume_start = random.randint(1, (1<<20))
+ self.group_start = random.randint(1, (1<<20))
+ self.snapshot_start = random.randint(1, (1<<20))
+ self.clone_start = random.randint(1, (1<<20))
+
+ def tearDown(self):
+ if self.vol_created:
+ self._delete_test_volume()
+ super(TestVolumesHelper, self).tearDown()
+
+
+class TestVolumes(TestVolumesHelper):
+ """Tests for FS volume operations."""
+ def test_volume_create(self):
+ """
+ That the volume can be created and then cleans up
+ """
+ volname = self._generate_random_volume_name()
+ self._fs_cmd("volume", "create", volname)
+ volumels = json.loads(self._fs_cmd("volume", "ls"))
+
+ if not (volname in ([volume['name'] for volume in volumels])):
+ raise RuntimeError("Error creating volume '{0}'".format(volname))
+
+ # check that the pools were created with the correct config
+ pool_details = json.loads(self._raw_cmd("osd", "pool", "ls", "detail", "--format=json"))
+ pool_flags = {}
+ for pool in pool_details:
+ pool_flags[pool["pool_id"]] = pool["flags_names"].split(",")
+
+ volume_details = json.loads(self._fs_cmd("get", volname, "--format=json"))
+ for data_pool_id in volume_details['mdsmap']['data_pools']:
+ self.assertIn("bulk", pool_flags[data_pool_id])
+ meta_pool_id = volume_details['mdsmap']['metadata_pool']
+ self.assertNotIn("bulk", pool_flags[meta_pool_id])
+
+ # clean up
+ self._fs_cmd("volume", "rm", volname, "--yes-i-really-mean-it")
+
+ def test_volume_ls(self):
+ """
+ That the existing and the newly created volumes can be listed and
+ finally cleans up.
+ """
+ vls = json.loads(self._fs_cmd("volume", "ls"))
+ volumes = [volume['name'] for volume in vls]
+
+ #create new volumes and add it to the existing list of volumes
+ volumenames = self._generate_random_volume_name(2)
+ for volumename in volumenames:
+ self._fs_cmd("volume", "create", volumename)
+ volumes.extend(volumenames)
+
+ # list volumes
+ try:
+ volumels = json.loads(self._fs_cmd('volume', 'ls'))
+ if len(volumels) == 0:
+ raise RuntimeError("Expected the 'fs volume ls' command to list the created volumes.")
+ else:
+ volnames = [volume['name'] for volume in volumels]
+ if collections.Counter(volnames) != collections.Counter(volumes):
+ raise RuntimeError("Error creating or listing volumes")
+ finally:
+ # clean up
+ for volume in volumenames:
+ self._fs_cmd("volume", "rm", volume, "--yes-i-really-mean-it")
+
+ def test_volume_rm(self):
+ """
+ That the volume can only be removed when --yes-i-really-mean-it is used
+ and verify that the deleted volume is not listed anymore.
+ """
+ for m in self.mounts:
+ m.umount_wait()
+ try:
+ self._fs_cmd("volume", "rm", self.volname)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EPERM:
+ raise RuntimeError("expected the 'fs volume rm' command to fail with EPERM, "
+ "but it failed with {0}".format(ce.exitstatus))
+ else:
+ self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+
+ #check if it's gone
+ volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty"))
+ if (self.volname in [volume['name'] for volume in volumes]):
+ raise RuntimeError("Expected the 'fs volume rm' command to succeed. "
+ "The volume {0} not removed.".format(self.volname))
+ else:
+ raise RuntimeError("expected the 'fs volume rm' command to fail.")
+
+ def test_volume_rm_arbitrary_pool_removal(self):
+ """
+ That the arbitrary pool added to the volume out of band is removed
+ successfully on volume removal.
+ """
+ for m in self.mounts:
+ m.umount_wait()
+ new_pool = "new_pool"
+ # add arbitrary data pool
+ self.fs.add_data_pool(new_pool)
+ vol_status = json.loads(self._fs_cmd("status", self.volname, "--format=json-pretty"))
+ self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+
+ #check if fs is gone
+ volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty"))
+ volnames = [volume['name'] for volume in volumes]
+ self.assertNotIn(self.volname, volnames)
+
+ #check if osd pools are gone
+ pools = json.loads(self._raw_cmd("osd", "pool", "ls", "--format=json-pretty"))
+ for pool in vol_status["pools"]:
+ self.assertNotIn(pool["name"], pools)
+
+ def test_volume_rm_when_mon_delete_pool_false(self):
+ """
+ That the volume can only be removed when mon_allowd_pool_delete is set
+ to true and verify that the pools are removed after volume deletion.
+ """
+ for m in self.mounts:
+ m.umount_wait()
+ self.config_set('mon', 'mon_allow_pool_delete', False)
+ try:
+ self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EPERM,
+ "expected the 'fs volume rm' command to fail with EPERM, "
+ "but it failed with {0}".format(ce.exitstatus))
+ vol_status = json.loads(self._fs_cmd("status", self.volname, "--format=json-pretty"))
+ self.config_set('mon', 'mon_allow_pool_delete', True)
+ self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it")
+
+ #check if fs is gone
+ volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty"))
+ volnames = [volume['name'] for volume in volumes]
+ self.assertNotIn(self.volname, volnames,
+ "volume {0} exists after removal".format(self.volname))
+ #check if pools are gone
+ pools = json.loads(self._raw_cmd("osd", "pool", "ls", "--format=json-pretty"))
+ for pool in vol_status["pools"]:
+ self.assertNotIn(pool["name"], pools,
+ "pool {0} exists after volume removal".format(pool["name"]))
+
+ def test_volume_rename(self):
+ """
+ That volume, its file system and pools, can be renamed.
+ """
+ for m in self.mounts:
+ m.umount_wait()
+ oldvolname = self.volname
+ newvolname = self._generate_random_volume_name()
+ new_data_pool, new_metadata_pool = f"cephfs.{newvolname}.data", f"cephfs.{newvolname}.meta"
+ self._fs_cmd("volume", "rename", oldvolname, newvolname,
+ "--yes-i-really-mean-it")
+ volumels = json.loads(self._fs_cmd('volume', 'ls'))
+ volnames = [volume['name'] for volume in volumels]
+ # volume name changed
+ self.assertIn(newvolname, volnames)
+ self.assertNotIn(oldvolname, volnames)
+ # pool names changed
+ self.fs.get_pool_names(refresh=True)
+ self.assertEqual(new_metadata_pool, self.fs.get_metadata_pool_name())
+ self.assertEqual(new_data_pool, self.fs.get_data_pool_name())
+
+ def test_volume_rename_idempotency(self):
+ """
+ That volume rename is idempotent.
+ """
+ for m in self.mounts:
+ m.umount_wait()
+ oldvolname = self.volname
+ newvolname = self._generate_random_volume_name()
+ new_data_pool, new_metadata_pool = f"cephfs.{newvolname}.data", f"cephfs.{newvolname}.meta"
+ self._fs_cmd("volume", "rename", oldvolname, newvolname,
+ "--yes-i-really-mean-it")
+ self._fs_cmd("volume", "rename", oldvolname, newvolname,
+ "--yes-i-really-mean-it")
+ volumels = json.loads(self._fs_cmd('volume', 'ls'))
+ volnames = [volume['name'] for volume in volumels]
+ self.assertIn(newvolname, volnames)
+ self.assertNotIn(oldvolname, volnames)
+ self.fs.get_pool_names(refresh=True)
+ self.assertEqual(new_metadata_pool, self.fs.get_metadata_pool_name())
+ self.assertEqual(new_data_pool, self.fs.get_data_pool_name())
+
+ def test_volume_rename_fails_without_confirmation_flag(self):
+ """
+ That renaming volume fails without --yes-i-really-mean-it flag.
+ """
+ newvolname = self._generate_random_volume_name()
+ try:
+ self._fs_cmd("volume", "rename", self.volname, newvolname)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EPERM,
+ "invalid error code on renaming a FS volume without the "
+ "'--yes-i-really-mean-it' flag")
+ else:
+ self.fail("expected renaming of FS volume to fail without the "
+ "'--yes-i-really-mean-it' flag")
+
+ def test_volume_rename_for_more_than_one_data_pool(self):
+ """
+ That renaming a volume with more than one data pool does not change
+ the name of the data pools.
+ """
+ for m in self.mounts:
+ m.umount_wait()
+ self.fs.add_data_pool('another-data-pool')
+ oldvolname = self.volname
+ newvolname = self._generate_random_volume_name()
+ self.fs.get_pool_names(refresh=True)
+ orig_data_pool_names = list(self.fs.data_pools.values())
+ new_metadata_pool = f"cephfs.{newvolname}.meta"
+ self._fs_cmd("volume", "rename", self.volname, newvolname,
+ "--yes-i-really-mean-it")
+ volumels = json.loads(self._fs_cmd('volume', 'ls'))
+ volnames = [volume['name'] for volume in volumels]
+ # volume name changed
+ self.assertIn(newvolname, volnames)
+ self.assertNotIn(oldvolname, volnames)
+ self.fs.get_pool_names(refresh=True)
+ # metadata pool name changed
+ self.assertEqual(new_metadata_pool, self.fs.get_metadata_pool_name())
+ # data pool names unchanged
+ self.assertCountEqual(orig_data_pool_names, list(self.fs.data_pools.values()))
+
+ def test_volume_info(self):
+ """
+ Tests the 'fs volume info' command
+ """
+ vol_fields = ["pools", "used_size", "pending_subvolume_deletions", "mon_addrs"]
+ group = self._generate_random_group_name()
+ # create subvolumegroup
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+ # get volume metadata
+ vol_info = json.loads(self._get_volume_info(self.volname))
+ for md in vol_fields:
+ self.assertIn(md, vol_info,
+ f"'{md}' key not present in metadata of volume")
+ self.assertEqual(vol_info["used_size"], 0,
+ "Size should be zero when volumes directory is empty")
+
+ def test_volume_info_pending_subvol_deletions(self):
+ """
+ Tests the pending_subvolume_deletions in 'fs volume info' command
+ """
+ subvolname = self._generate_random_subvolume_name()
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--mode=777")
+ # create 3K zero byte files
+ self._do_subvolume_io(subvolname, number_of_files=3000, file_size=0)
+ # Delete the subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+ # get volume metadata
+ vol_info = json.loads(self._get_volume_info(self.volname))
+ self.assertNotEqual(vol_info['pending_subvolume_deletions'], 0,
+ "pending_subvolume_deletions should be 1")
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_volume_info_without_subvolumegroup(self):
+ """
+ Tests the 'fs volume info' command without subvolume group
+ """
+ vol_fields = ["pools", "mon_addrs"]
+ # get volume metadata
+ vol_info = json.loads(self._get_volume_info(self.volname))
+ for md in vol_fields:
+ self.assertIn(md, vol_info,
+ f"'{md}' key not present in metadata of volume")
+ self.assertNotIn("used_size", vol_info,
+ "'used_size' should not be present in absence of subvolumegroup")
+ self.assertNotIn("pending_subvolume_deletions", vol_info,
+ "'pending_subvolume_deletions' should not be present in absence"
+ " of subvolumegroup")
+
+ def test_volume_info_with_human_readable_flag(self):
+ """
+ Tests the 'fs volume info --human_readable' command
+ """
+ vol_fields = ["pools", "used_size", "pending_subvolume_deletions", "mon_addrs"]
+ group = self._generate_random_group_name()
+ # create subvolumegroup
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+ # get volume metadata
+ vol_info = json.loads(self._get_volume_info(self.volname, "--human_readable"))
+ for md in vol_fields:
+ self.assertIn(md, vol_info,
+ f"'{md}' key not present in metadata of volume")
+ units = [' ', 'k', 'M', 'G', 'T', 'P', 'E']
+ assert vol_info["used_size"][-1] in units, "unit suffix in used_size is absent"
+ assert vol_info["pools"]["data"][0]["avail"][-1] in units, "unit suffix in avail data is absent"
+ assert vol_info["pools"]["data"][0]["used"][-1] in units, "unit suffix in used data is absent"
+ assert vol_info["pools"]["metadata"][0]["avail"][-1] in units, "unit suffix in avail metadata is absent"
+ assert vol_info["pools"]["metadata"][0]["used"][-1] in units, "unit suffix in used metadata is absent"
+ self.assertEqual(int(vol_info["used_size"]), 0,
+ "Size should be zero when volumes directory is empty")
+
+ def test_volume_info_with_human_readable_flag_without_subvolumegroup(self):
+ """
+ Tests the 'fs volume info --human_readable' command without subvolume group
+ """
+ vol_fields = ["pools", "mon_addrs"]
+ # get volume metadata
+ vol_info = json.loads(self._get_volume_info(self.volname, "--human_readable"))
+ for md in vol_fields:
+ self.assertIn(md, vol_info,
+ f"'{md}' key not present in metadata of volume")
+ units = [' ', 'k', 'M', 'G', 'T', 'P', 'E']
+ assert vol_info["pools"]["data"][0]["avail"][-1] in units, "unit suffix in avail data is absent"
+ assert vol_info["pools"]["data"][0]["used"][-1] in units, "unit suffix in used data is absent"
+ assert vol_info["pools"]["metadata"][0]["avail"][-1] in units, "unit suffix in avail metadata is absent"
+ assert vol_info["pools"]["metadata"][0]["used"][-1] in units, "unit suffix in used metadata is absent"
+ self.assertNotIn("used_size", vol_info,
+ "'used_size' should not be present in absence of subvolumegroup")
+ self.assertNotIn("pending_subvolume_deletions", vol_info,
+ "'pending_subvolume_deletions' should not be present in absence"
+ " of subvolumegroup")
+
+
+class TestSubvolumeGroups(TestVolumesHelper):
+ """Tests for FS subvolume group operations."""
+ def test_default_uid_gid_subvolume_group(self):
+ group = self._generate_random_group_name()
+ expected_uid = 0
+ expected_gid = 0
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+ group_path = self._get_subvolume_group_path(self.volname, group)
+
+ # check group's uid and gid
+ stat = self.mount_a.stat(group_path)
+ self.assertEqual(stat['st_uid'], expected_uid)
+ self.assertEqual(stat['st_gid'], expected_gid)
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_nonexistent_subvolume_group_create(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = "non_existent_group"
+
+ # try, creating subvolume in a nonexistent group
+ try:
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs subvolume create' command to fail")
+
+ def test_nonexistent_subvolume_group_rm(self):
+ group = "non_existent_group"
+
+ # try, remove subvolume group
+ try:
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs subvolumegroup rm' command to fail")
+
+ def test_subvolume_group_create_with_auto_cleanup_on_fail(self):
+ group = self._generate_random_group_name()
+ data_pool = "invalid_pool"
+ # create group with invalid data pool layout
+ with self.assertRaises(CommandFailedError):
+ self._fs_cmd("subvolumegroup", "create", self.volname, group, "--pool_layout", data_pool)
+
+ # check whether group path is cleaned up
+ try:
+ self._fs_cmd("subvolumegroup", "getpath", self.volname, group)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs subvolumegroup getpath' command to fail")
+
+ def test_subvolume_group_create_with_desired_data_pool_layout(self):
+ group1, group2 = self._generate_random_group_name(2)
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group1)
+ group1_path = self._get_subvolume_group_path(self.volname, group1)
+
+ default_pool = self.mount_a.getfattr(group1_path, "ceph.dir.layout.pool")
+ new_pool = "new_pool"
+ self.assertNotEqual(default_pool, new_pool)
+
+ # add data pool
+ newid = self.fs.add_data_pool(new_pool)
+
+ # create group specifying the new data pool as its pool layout
+ self._fs_cmd("subvolumegroup", "create", self.volname, group2,
+ "--pool_layout", new_pool)
+ group2_path = self._get_subvolume_group_path(self.volname, group2)
+
+ desired_pool = self.mount_a.getfattr(group2_path, "ceph.dir.layout.pool")
+ try:
+ self.assertEqual(desired_pool, new_pool)
+ except AssertionError:
+ self.assertEqual(int(desired_pool), newid) # old kernel returns id
+
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group1)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group2)
+
+ def test_subvolume_group_create_with_desired_mode(self):
+ group1, group2 = self._generate_random_group_name(2)
+ # default mode
+ expected_mode1 = "755"
+ # desired mode
+ expected_mode2 = "777"
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group2, f"--mode={expected_mode2}")
+ self._fs_cmd("subvolumegroup", "create", self.volname, group1)
+
+ group1_path = self._get_subvolume_group_path(self.volname, group1)
+ group2_path = self._get_subvolume_group_path(self.volname, group2)
+ volumes_path = os.path.dirname(group1_path)
+
+ # check group's mode
+ actual_mode1 = self.mount_a.run_shell(['stat', '-c' '%a', group1_path]).stdout.getvalue().strip()
+ actual_mode2 = self.mount_a.run_shell(['stat', '-c' '%a', group2_path]).stdout.getvalue().strip()
+ actual_mode3 = self.mount_a.run_shell(['stat', '-c' '%a', volumes_path]).stdout.getvalue().strip()
+ self.assertEqual(actual_mode1, expected_mode1)
+ self.assertEqual(actual_mode2, expected_mode2)
+ self.assertEqual(actual_mode3, expected_mode1)
+
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group1)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group2)
+
+ def test_subvolume_group_create_with_desired_uid_gid(self):
+ """
+ That the subvolume group can be created with the desired uid and gid and its uid and gid matches the
+ expected values.
+ """
+ uid = 1000
+ gid = 1000
+
+ # create subvolume group
+ subvolgroupname = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, subvolgroupname, "--uid", str(uid), "--gid", str(gid))
+
+ # make sure it exists
+ subvolgrouppath = self._get_subvolume_group_path(self.volname, subvolgroupname)
+ self.assertNotEqual(subvolgrouppath, None)
+
+ # verify the uid and gid
+ suid = int(self.mount_a.run_shell(['stat', '-c' '%u', subvolgrouppath]).stdout.getvalue().strip())
+ sgid = int(self.mount_a.run_shell(['stat', '-c' '%g', subvolgrouppath]).stdout.getvalue().strip())
+ self.assertEqual(uid, suid)
+ self.assertEqual(gid, sgid)
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, subvolgroupname)
+
+ def test_subvolume_group_create_with_invalid_data_pool_layout(self):
+ group = self._generate_random_group_name()
+ data_pool = "invalid_pool"
+ # create group with invalid data pool layout
+ try:
+ self._fs_cmd("subvolumegroup", "create", self.volname, group, "--pool_layout", data_pool)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EINVAL:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs subvolumegroup create' command to fail")
+
+ def test_subvolume_group_create_with_size(self):
+ # create group with size -- should set quota
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000")
+
+ # get group metadata
+ group_info = json.loads(self._get_subvolume_group_info(self.volname, group))
+ self.assertEqual(group_info["bytes_quota"], 1000000000)
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_group_info(self):
+ # tests the 'fs subvolumegroup info' command
+
+ group_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime",
+ "data_pool", "gid", "mode", "mon_addrs", "mtime", "uid"]
+
+ # create group
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # get group metadata
+ group_info = json.loads(self._get_subvolume_group_info(self.volname, group))
+ for md in group_md:
+ self.assertIn(md, group_info, "'{0}' key not present in metadata of group".format(md))
+
+ self.assertEqual(group_info["bytes_pcent"], "undefined", "bytes_pcent should be set to undefined if quota is not set")
+ self.assertEqual(group_info["bytes_quota"], "infinite", "bytes_quota should be set to infinite if quota is not set")
+ self.assertEqual(group_info["uid"], 0)
+ self.assertEqual(group_info["gid"], 0)
+
+ nsize = self.DEFAULT_FILE_SIZE*1024*1024
+ self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize))
+
+ # get group metadata after quota set
+ group_info = json.loads(self._get_subvolume_group_info(self.volname, group))
+ for md in group_md:
+ self.assertIn(md, group_info, "'{0}' key not present in metadata of subvolume".format(md))
+
+ self.assertNotEqual(group_info["bytes_pcent"], "undefined", "bytes_pcent should not be set to undefined if quota is set")
+ self.assertEqual(group_info["bytes_quota"], nsize, "bytes_quota should be set to '{0}'".format(nsize))
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_group_create_idempotence(self):
+ # create group
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # try creating w/ same subvolume group name -- should be idempotent
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_group_create_idempotence_mode(self):
+ # create group
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # try creating w/ same subvolume group name with mode -- should set mode
+ self._fs_cmd("subvolumegroup", "create", self.volname, group, "--mode=766")
+
+ group_path = self._get_subvolume_group_path(self.volname, group)
+
+ # check subvolumegroup's mode
+ mode = self.mount_a.run_shell(['stat', '-c' '%a', group_path]).stdout.getvalue().strip()
+ self.assertEqual(mode, "766")
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_group_create_idempotence_uid_gid(self):
+ desired_uid = 1000
+ desired_gid = 1000
+
+ # create group
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # try creating w/ same subvolume group name with uid/gid -- should set uid/gid
+ self._fs_cmd("subvolumegroup", "create", self.volname, group, "--uid", str(desired_uid), "--gid", str(desired_gid))
+
+ group_path = self._get_subvolume_group_path(self.volname, group)
+
+ # verify the uid and gid
+ actual_uid = int(self.mount_a.run_shell(['stat', '-c' '%u', group_path]).stdout.getvalue().strip())
+ actual_gid = int(self.mount_a.run_shell(['stat', '-c' '%g', group_path]).stdout.getvalue().strip())
+ self.assertEqual(desired_uid, actual_uid)
+ self.assertEqual(desired_gid, actual_gid)
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_group_create_idempotence_data_pool(self):
+ # create group
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ group_path = self._get_subvolume_group_path(self.volname, group)
+
+ default_pool = self.mount_a.getfattr(group_path, "ceph.dir.layout.pool")
+ new_pool = "new_pool"
+ self.assertNotEqual(default_pool, new_pool)
+
+ # add data pool
+ newid = self.fs.add_data_pool(new_pool)
+
+ # try creating w/ same subvolume group name with new data pool -- should set pool
+ self._fs_cmd("subvolumegroup", "create", self.volname, group, "--pool_layout", new_pool)
+ desired_pool = self.mount_a.getfattr(group_path, "ceph.dir.layout.pool")
+ try:
+ self.assertEqual(desired_pool, new_pool)
+ except AssertionError:
+ self.assertEqual(int(desired_pool), newid) # old kernel returns id
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_group_create_idempotence_resize(self):
+ # create group
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # try creating w/ same subvolume name with size -- should set quota
+ self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000")
+
+ # get group metadata
+ group_info = json.loads(self._get_subvolume_group_info(self.volname, group))
+ self.assertEqual(group_info["bytes_quota"], 1000000000)
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_group_quota_mds_path_restriction_to_group_path(self):
+ """
+ Tests subvolumegroup quota enforcement with mds path restriction set to group.
+ For quota to be enforced, read permission needs to be provided to the parent
+ of the directory on which quota is set. Please see the tracker comment [1]
+ [1] https://tracker.ceph.com/issues/55090#note-8
+ """
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*100
+ # create group with 100MB quota
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group,
+ "--size", str(osize), "--mode=777")
+
+ # make sure it exists
+ grouppath = self._get_subvolume_group_path(self.volname, group)
+ self.assertNotEqual(grouppath, None)
+
+ # create subvolume under the group
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname,
+ "--group_name", group, "--mode=777")
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group)
+ self.assertNotEqual(subvolpath, None)
+
+ # Create auth_id
+ authid = "client.guest1"
+ user = json.loads(self.fs.mon_manager.raw_cluster_cmd(
+ "auth", "get-or-create", authid,
+ "mds", "allow rw path=/volumes",
+ "mgr", "allow rw",
+ "osd", "allow rw tag cephfs *=*",
+ "mon", "allow r",
+ "--format=json-pretty"
+ ))
+
+ # Prepare guest_mount with new authid
+ guest_mount = self.mount_b
+ guest_mount.umount_wait()
+
+ # configure credentials for guest client
+ self._configure_guest_auth(guest_mount, "guest1", user[0]["key"])
+
+ # mount the subvolume
+ mount_path = os.path.join("/", subvolpath)
+ guest_mount.mount_wait(cephfs_mntpt=mount_path)
+
+ # create 99 files of 1MB
+ guest_mount.run_shell_payload("mkdir -p dir1")
+ for i in range(99):
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i)
+ guest_mount.write_n_mb(os.path.join("dir1", filename), self.DEFAULT_FILE_SIZE)
+ try:
+ # write two files of 1MB file to exceed the quota
+ guest_mount.run_shell_payload("mkdir -p dir2")
+ for i in range(2):
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i)
+ guest_mount.write_n_mb(os.path.join("dir2", filename), self.DEFAULT_FILE_SIZE)
+ # For quota to be enforced
+ time.sleep(60)
+ # create 400 files of 1MB to exceed quota
+ for i in range(400):
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i)
+ guest_mount.write_n_mb(os.path.join("dir2", filename), self.DEFAULT_FILE_SIZE)
+ # Sometimes quota enforcement takes time.
+ if i == 200:
+ time.sleep(60)
+ except CommandFailedError:
+ pass
+ else:
+ self.fail(f"expected filling subvolume {subvolname} with 400 files of size 1MB to fail")
+
+ # clean up
+ guest_mount.umount_wait()
+
+ # Delete the subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group)
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_group_quota_mds_path_restriction_to_subvolume_path(self):
+ """
+ Tests subvolumegroup quota enforcement with mds path restriction set to subvolume path
+ The quota should not be enforced because of the fourth limitation mentioned at
+ https://docs.ceph.com/en/latest/cephfs/quota/#limitations
+ """
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*100
+ # create group with 100MB quota
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group,
+ "--size", str(osize), "--mode=777")
+
+ # make sure it exists
+ grouppath = self._get_subvolume_group_path(self.volname, group)
+ self.assertNotEqual(grouppath, None)
+
+ # create subvolume under the group
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname,
+ "--group_name", group, "--mode=777")
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group)
+ self.assertNotEqual(subvolpath, None)
+
+ mount_path = os.path.join("/", subvolpath)
+
+ # Create auth_id
+ authid = "client.guest1"
+ user = json.loads(self.fs.mon_manager.raw_cluster_cmd(
+ "auth", "get-or-create", authid,
+ "mds", f"allow rw path={mount_path}",
+ "mgr", "allow rw",
+ "osd", "allow rw tag cephfs *=*",
+ "mon", "allow r",
+ "--format=json-pretty"
+ ))
+
+ # Prepare guest_mount with new authid
+ guest_mount = self.mount_b
+ guest_mount.umount_wait()
+
+ # configure credentials for guest client
+ self._configure_guest_auth(guest_mount, "guest1", user[0]["key"])
+
+ # mount the subvolume
+ guest_mount.mount_wait(cephfs_mntpt=mount_path)
+
+ # create 99 files of 1MB to exceed quota
+ guest_mount.run_shell_payload("mkdir -p dir1")
+ for i in range(99):
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i)
+ guest_mount.write_n_mb(os.path.join("dir1", filename), self.DEFAULT_FILE_SIZE)
+ try:
+ # write two files of 1MB file to exceed the quota
+ guest_mount.run_shell_payload("mkdir -p dir2")
+ for i in range(2):
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i)
+ guest_mount.write_n_mb(os.path.join("dir2", filename), self.DEFAULT_FILE_SIZE)
+ # For quota to be enforced
+ time.sleep(60)
+ # create 400 files of 1MB to exceed quota
+ for i in range(400):
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i)
+ guest_mount.write_n_mb(os.path.join("dir2", filename), self.DEFAULT_FILE_SIZE)
+ # Sometimes quota enforcement takes time.
+ if i == 200:
+ time.sleep(60)
+ except CommandFailedError:
+ self.fail(f"Quota should not be enforced, expected filling subvolume {subvolname} with 400 files of size 1MB to succeed")
+
+ # clean up
+ guest_mount.umount_wait()
+
+ # Delete the subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group)
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_group_quota_exceeded_subvolume_removal(self):
+ """
+ Tests subvolume removal if it's group quota is exceeded
+ """
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*100
+ # create group with 100MB quota
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group,
+ "--size", str(osize), "--mode=777")
+
+ # make sure it exists
+ grouppath = self._get_subvolume_group_path(self.volname, group)
+ self.assertNotEqual(grouppath, None)
+
+ # create subvolume under the group
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname,
+ "--group_name", group, "--mode=777")
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group)
+ self.assertNotEqual(subvolpath, None)
+
+ # create 99 files of 1MB to exceed quota
+ self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=99)
+
+ try:
+ # write two files of 1MB file to exceed the quota
+ self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=2)
+ # For quota to be enforced
+ time.sleep(20)
+ # create 400 files of 1MB to exceed quota
+ self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=400)
+ except CommandFailedError:
+ # Delete subvolume when group quota is exceeded
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group)
+ else:
+ self.fail(f"expected filling subvolume {subvolname} with 400 files of size 1MB to fail")
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_group_quota_exceeded_subvolume_removal_retained_snaps(self):
+ """
+ Tests retained snapshot subvolume removal if it's group quota is exceeded
+ """
+ group = self._generate_random_group_name()
+ subvolname = self._generate_random_subvolume_name()
+ snapshot1, snapshot2 = self._generate_random_snapshot_name(2)
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*100
+ # create group with 100MB quota
+ self._fs_cmd("subvolumegroup", "create", self.volname, group,
+ "--size", str(osize), "--mode=777")
+
+ # make sure it exists
+ grouppath = self._get_subvolume_group_path(self.volname, group)
+ self.assertNotEqual(grouppath, None)
+
+ # create subvolume under the group
+ self._fs_cmd("subvolume", "create", self.volname, subvolname,
+ "--group_name", group, "--mode=777")
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group)
+ self.assertNotEqual(subvolpath, None)
+
+ # create 99 files of 1MB to exceed quota
+ self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=99)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot1, "--group_name", group)
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot2, "--group_name", group)
+
+ try:
+ # write two files of 1MB file to exceed the quota
+ self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=2)
+ # For quota to be enforced
+ time.sleep(20)
+ # create 400 files of 1MB to exceed quota
+ self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=400)
+ except CommandFailedError:
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group, "--retain-snapshots")
+ # remove snapshot1
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot1, "--group_name", group)
+ # remove snapshot2 (should remove volume)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot2, "--group_name", group)
+ # verify subvolume trash is clean
+ self._wait_for_subvol_trash_empty(subvolname, group=group)
+ else:
+ self.fail(f"expected filling subvolume {subvolname} with 400 files of size 1MB to fail")
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_group_quota_subvolume_removal(self):
+ """
+ Tests subvolume removal if it's group quota is set.
+ """
+ # create group with size -- should set quota
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000")
+
+ # create subvolume under the group
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+ # remove subvolume
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume rm' command to succeed if group quota is set")
+
+ # remove subvolumegroup
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_group_quota_legacy_subvolume_removal(self):
+ """
+ Tests legacy subvolume removal if it's group quota is set.
+ """
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # emulate a old-fashioned subvolume -- in a custom group
+ createpath1 = os.path.join(".", "volumes", group, subvolume)
+ self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath1], omit_sudo=False)
+
+ # this would auto-upgrade on access without anyone noticing
+ subvolpath1 = self._fs_cmd("subvolume", "getpath", self.volname, subvolume, "--group-name", group)
+ self.assertNotEqual(subvolpath1, None)
+ subvolpath1 = subvolpath1.rstrip() # remove "/" prefix and any trailing newline
+
+ # and... the subvolume path returned should be what we created behind the scene
+ self.assertEqual(createpath1[1:], subvolpath1)
+
+ # Set subvolumegroup quota on idempotent subvolumegroup creation
+ self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000")
+
+ # remove subvolume
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume rm' command to succeed if group quota is set")
+
+ # remove subvolumegroup
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_group_quota_v1_subvolume_removal(self):
+ """
+ Tests v1 subvolume removal if it's group quota is set.
+ """
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # emulate a v1 subvolume -- in a custom group
+ self._create_v1_subvolume(subvolume, subvol_group=group, has_snapshot=False)
+
+ # Set subvolumegroup quota on idempotent subvolumegroup creation
+ self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000")
+
+ # remove subvolume
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume rm' command to succeed if group quota is set")
+
+ # remove subvolumegroup
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_group_resize_fail_invalid_size(self):
+ """
+ That a subvolume group cannot be resized to an invalid size and the quota did not change
+ """
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024
+ # create group with 1MB quota
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize))
+
+ # make sure it exists
+ grouppath = self._get_subvolume_group_path(self.volname, group)
+ self.assertNotEqual(grouppath, None)
+
+ # try to resize the subvolume with an invalid size -10
+ nsize = -10
+ try:
+ self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize))
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL,
+ "invalid error code on resize of subvolume group with invalid size")
+ else:
+ self.fail("expected the 'fs subvolumegroup resize' command to fail")
+
+ # verify the quota did not change
+ size = int(self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes"))
+ self.assertEqual(size, osize)
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_group_resize_fail_zero_size(self):
+ """
+ That a subvolume group cannot be resized to a zero size and the quota did not change
+ """
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024
+ # create group with 1MB quota
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize))
+
+ # make sure it exists
+ grouppath = self._get_subvolume_group_path(self.volname, group)
+ self.assertNotEqual(grouppath, None)
+
+ # try to resize the subvolume group with size 0
+ nsize = 0
+ try:
+ self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize))
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL,
+ "invalid error code on resize of subvolume group with invalid size")
+ else:
+ self.fail("expected the 'fs subvolumegroup resize' command to fail")
+
+ # verify the quota did not change
+ size = int(self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes"))
+ self.assertEqual(size, osize)
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_group_resize_quota_lt_used_size(self):
+ """
+ That a subvolume group can be resized to a size smaller than the current used size
+ and the resulting quota matches the expected size.
+ """
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*20
+ # create group with 20MB quota
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group,
+ "--size", str(osize), "--mode=777")
+
+ # make sure it exists
+ grouppath = self._get_subvolume_group_path(self.volname, group)
+ self.assertNotEqual(grouppath, None)
+
+ # create subvolume under the group
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname,
+ "--group_name", group, "--mode=777")
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group)
+ self.assertNotEqual(subvolpath, None)
+
+ # create one file of 10MB
+ file_size=self.DEFAULT_FILE_SIZE*10
+ number_of_files=1
+ log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+ number_of_files,
+ file_size))
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+1)
+ self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+
+ usedsize = int(self.mount_a.getfattr(subvolpath, "ceph.dir.rbytes"))
+
+ # shrink the subvolume group
+ nsize = usedsize // 2
+ try:
+ self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize))
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolumegroup resize' command to succeed")
+
+ # verify the quota
+ size = int(self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes"))
+ self.assertEqual(size, nsize)
+
+ # remove subvolume and group
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_group_resize_fail_quota_lt_used_size_no_shrink(self):
+ """
+ That a subvolume group cannot be resized to a size smaller than the current used size
+ when --no_shrink is given and the quota did not change.
+ """
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*20
+ # create group with 20MB quota
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group,
+ "--size", str(osize), "--mode=777")
+
+ # make sure it exists
+ grouppath = self._get_subvolume_group_path(self.volname, group)
+ self.assertNotEqual(grouppath, None)
+
+ # create subvolume under the group
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname,
+ "--group_name", group, "--mode=777")
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group)
+ self.assertNotEqual(subvolpath, None)
+
+ # create one file of 10MB
+ file_size=self.DEFAULT_FILE_SIZE*10
+ number_of_files=1
+ log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+ number_of_files,
+ file_size))
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+2)
+ self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+
+ usedsize = int(self.mount_a.getfattr(grouppath, "ceph.dir.rbytes"))
+
+ # shrink the subvolume group
+ nsize = usedsize // 2
+ try:
+ self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize), "--no_shrink")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolumegroup with quota less than used")
+ else:
+ self.fail("expected the 'fs subvolumegroup resize' command to fail")
+
+ # verify the quota did not change
+ size = int(self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes"))
+ self.assertEqual(size, osize)
+
+ # remove subvolume and group
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_group_resize_expand_on_full_subvolume(self):
+ """
+ That the subvolume group can be expanded after it is full and future write succeed
+ """
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*100
+ # create group with 100MB quota
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group,
+ "--size", str(osize), "--mode=777")
+
+ # make sure it exists
+ grouppath = self._get_subvolume_group_path(self.volname, group)
+ self.assertNotEqual(grouppath, None)
+
+ # create subvolume under the group
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname,
+ "--group_name", group, "--mode=777")
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group)
+ self.assertNotEqual(subvolpath, None)
+
+ # create 99 files of 1MB
+ self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=99)
+
+ try:
+ # write two files of 1MB file to exceed the quota
+ self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=2)
+ # For quota to be enforced
+ time.sleep(20)
+ # create 500 files of 1MB
+ self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=500)
+ except CommandFailedError:
+ # Not able to write. So expand the subvolumegroup more and try writing the files again
+ nsize = osize*7
+ self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize))
+ try:
+ self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=500)
+ except CommandFailedError:
+ self.fail("expected filling subvolume {0} with 500 files of size 1MB "
+ "to succeed".format(subvolname))
+ else:
+ self.fail("expected filling subvolume {0} with 500 files of size 1MB "
+ "to fail".format(subvolname))
+
+ # remove subvolume and group
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_group_resize_infinite_size(self):
+ """
+ That a subvolume group can be resized to an infinite size by unsetting its quota.
+ """
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024
+ # create group
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group,
+ "--size", str(osize))
+
+ # make sure it exists
+ grouppath = self._get_subvolume_group_path(self.volname, group)
+ self.assertNotEqual(grouppath, None)
+
+ # resize inf
+ self._fs_cmd("subvolumegroup", "resize", self.volname, group, "inf")
+
+ # verify that the quota is None
+ size = self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes")
+ self.assertEqual(size, None)
+
+ # remove subvolume group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_group_resize_infinite_size_future_writes(self):
+ """
+ That a subvolume group can be resized to an infinite size and the future writes succeed.
+ """
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*5
+ # create group with 5MB quota
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group,
+ "--size", str(osize), "--mode=777")
+
+ # make sure it exists
+ grouppath = self._get_subvolume_group_path(self.volname, group)
+ self.assertNotEqual(grouppath, None)
+
+ # create subvolume under the group
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname,
+ "--group_name", group, "--mode=777")
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group)
+ self.assertNotEqual(subvolpath, None)
+
+ # create 4 files of 1MB
+ self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=4)
+
+ try:
+ # write two files of 1MB file to exceed the quota
+ self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=2)
+ # For quota to be enforced
+ time.sleep(20)
+ # create 500 files of 1MB
+ self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=500)
+ except CommandFailedError:
+ # Not able to write. So resize subvolumegroup to 'inf' and try writing the files again
+ # resize inf
+ self._fs_cmd("subvolumegroup", "resize", self.volname, group, "inf")
+ try:
+ self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=500)
+ except CommandFailedError:
+ self.fail("expected filling subvolume {0} with 500 files of size 1MB "
+ "to succeed".format(subvolname))
+ else:
+ self.fail("expected filling subvolume {0} with 500 files of size 1MB "
+ "to fail".format(subvolname))
+
+
+ # verify that the quota is None
+ size = self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes")
+ self.assertEqual(size, None)
+
+ # remove subvolume and group
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_group_ls(self):
+ # tests the 'fs subvolumegroup ls' command
+
+ subvolumegroups = []
+
+ #create subvolumegroups
+ subvolumegroups = self._generate_random_group_name(3)
+ for groupname in subvolumegroups:
+ self._fs_cmd("subvolumegroup", "create", self.volname, groupname)
+
+ subvolumegroupls = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname))
+ if len(subvolumegroupls) == 0:
+ raise RuntimeError("Expected the 'fs subvolumegroup ls' command to list the created subvolume groups")
+ else:
+ subvolgroupnames = [subvolumegroup['name'] for subvolumegroup in subvolumegroupls]
+ if collections.Counter(subvolgroupnames) != collections.Counter(subvolumegroups):
+ raise RuntimeError("Error creating or listing subvolume groups")
+
+ def test_subvolume_group_ls_filter(self):
+ # tests the 'fs subvolumegroup ls' command filters '_deleting' directory
+
+ subvolumegroups = []
+
+ #create subvolumegroup
+ subvolumegroups = self._generate_random_group_name(3)
+ for groupname in subvolumegroups:
+ self._fs_cmd("subvolumegroup", "create", self.volname, groupname)
+
+ # create subvolume and remove. This creates '_deleting' directory.
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ subvolumegroupls = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname))
+ subvolgroupnames = [subvolumegroup['name'] for subvolumegroup in subvolumegroupls]
+ if "_deleting" in subvolgroupnames:
+ self.fail("Listing subvolume groups listed '_deleting' directory")
+
+ def test_subvolume_group_ls_filter_internal_directories(self):
+ # tests the 'fs subvolumegroup ls' command filters internal directories
+ # eg: '_deleting', '_nogroup', '_index', "_legacy"
+
+ subvolumegroups = self._generate_random_group_name(3)
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ #create subvolumegroups
+ for groupname in subvolumegroups:
+ self._fs_cmd("subvolumegroup", "create", self.volname, groupname)
+
+ # create subvolume which will create '_nogroup' directory
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # create snapshot
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # clone snapshot which will create '_index' directory
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # wait for clone to complete
+ self._wait_for_clone_to_complete(clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume which will create '_deleting' directory
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # list subvolumegroups
+ ret = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname))
+ self.assertEqual(len(ret), len(subvolumegroups))
+
+ ret_list = [subvolumegroup['name'] for subvolumegroup in ret]
+ self.assertEqual(len(ret_list), len(subvolumegroups))
+
+ self.assertEqual(all(elem in subvolumegroups for elem in ret_list), True)
+
+ # cleanup
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+ for groupname in subvolumegroups:
+ self._fs_cmd("subvolumegroup", "rm", self.volname, groupname)
+
+ def test_subvolume_group_ls_for_nonexistent_volume(self):
+ # tests the 'fs subvolumegroup ls' command when /volume doesn't exist
+ # prerequisite: we expect that the test volume is created and a subvolumegroup is NOT created
+
+ # list subvolume groups
+ subvolumegroupls = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname))
+ if len(subvolumegroupls) > 0:
+ raise RuntimeError("Expected the 'fs subvolumegroup ls' command to output an empty list")
+
+ def test_subvolumegroup_pin_distributed(self):
+ self.fs.set_max_mds(2)
+ status = self.fs.wait_for_daemons()
+ self.config_set('mds', 'mds_export_ephemeral_distributed', True)
+
+ group = "pinme"
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+ self._fs_cmd("subvolumegroup", "pin", self.volname, group, "distributed", "True")
+ subvolumes = self._generate_random_subvolume_name(50)
+ for subvolume in subvolumes:
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+ self._wait_distributed_subtrees(2 * 2, status=status, rank="all")
+
+ # remove subvolumes
+ for subvolume in subvolumes:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_group_rm_force(self):
+ # test removing non-existing subvolume group with --force
+ group = self._generate_random_group_name()
+ try:
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group, "--force")
+ except CommandFailedError:
+ raise RuntimeError("expected the 'fs subvolumegroup rm --force' command to succeed")
+
+ def test_subvolume_group_exists_with_subvolumegroup_and_no_subvolume(self):
+ """Test the presence of any subvolumegroup when only subvolumegroup is present"""
+
+ group = self._generate_random_group_name()
+ # create subvolumegroup
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+ ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+ self.assertEqual(ret.strip('\n'), "subvolumegroup exists")
+ # delete subvolumegroup
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+ ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+ self.assertEqual(ret.strip('\n'), "no subvolumegroup exists")
+
+ def test_subvolume_group_exists_with_no_subvolumegroup_and_subvolume(self):
+ """Test the presence of any subvolumegroup when no subvolumegroup is present"""
+
+ ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+ self.assertEqual(ret.strip('\n'), "no subvolumegroup exists")
+
+ def test_subvolume_group_exists_with_subvolumegroup_and_subvolume(self):
+ """Test the presence of any subvolume when subvolumegroup
+ and subvolume both are present"""
+
+ group = self._generate_random_group_name()
+ subvolume = self._generate_random_subvolume_name(2)
+ # create subvolumegroup
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume[0], "--group_name", group)
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume[1])
+ ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+ self.assertEqual(ret.strip('\n'), "subvolumegroup exists")
+ # delete subvolume in group
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume[0], "--group_name", group)
+ ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+ self.assertEqual(ret.strip('\n'), "subvolumegroup exists")
+ # delete subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume[1])
+ ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+ self.assertEqual(ret.strip('\n'), "subvolumegroup exists")
+ # delete subvolumegroup
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+ ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+ self.assertEqual(ret.strip('\n'), "no subvolumegroup exists")
+
+ def test_subvolume_group_exists_without_subvolumegroup_and_with_subvolume(self):
+ """Test the presence of any subvolume when subvolume is present
+ but no subvolumegroup is present"""
+
+ subvolume = self._generate_random_subvolume_name()
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+ ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+ self.assertEqual(ret.strip('\n'), "no subvolumegroup exists")
+ # delete subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
+ self.assertEqual(ret.strip('\n'), "no subvolumegroup exists")
+
+
+class TestSubvolumes(TestVolumesHelper):
+ """Tests for FS subvolume operations, except snapshot and snapshot clone."""
+ def test_async_subvolume_rm(self):
+ subvolumes = self._generate_random_subvolume_name(100)
+
+ # create subvolumes
+ for subvolume in subvolumes:
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+ self._do_subvolume_io(subvolume, number_of_files=10)
+
+ self.mount_a.umount_wait()
+
+ # remove subvolumes
+ for subvolume in subvolumes:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ self.mount_a.mount_wait()
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty(timeout=300)
+
+ def test_default_uid_gid_subvolume(self):
+ subvolume = self._generate_random_subvolume_name()
+ expected_uid = 0
+ expected_gid = 0
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+ subvol_path = self._get_subvolume_path(self.volname, subvolume)
+
+ # check subvolume's uid and gid
+ stat = self.mount_a.stat(subvol_path)
+ self.assertEqual(stat['st_uid'], expected_uid)
+ self.assertEqual(stat['st_gid'], expected_gid)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_nonexistent_subvolume_rm(self):
+ # remove non-existing subvolume
+ subvolume = "non_existent_subvolume"
+
+ # try, remove subvolume
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs subvolume rm' command to fail")
+
+ def test_subvolume_create_and_rm(self):
+ # create subvolume
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # make sure it exists
+ subvolpath = self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
+ self.assertNotEqual(subvolpath, None)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ # make sure its gone
+ try:
+ self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs subvolume getpath' command to fail. Subvolume not removed.")
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_and_rm_in_group(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_create_idempotence(self):
+ # create subvolume
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # try creating w/ same subvolume name -- should be idempotent
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_idempotence_resize(self):
+ # create subvolume
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # try creating w/ same subvolume name with size -- should set quota
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "1000000000")
+
+ # get subvolume metadata
+ subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+ self.assertEqual(subvol_info["bytes_quota"], 1000000000)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_idempotence_mode(self):
+ # default mode
+ default_mode = "755"
+
+ # create subvolume
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ subvol_path = self._get_subvolume_path(self.volname, subvolume)
+
+ actual_mode_1 = self.mount_a.run_shell(['stat', '-c' '%a', subvol_path]).stdout.getvalue().strip()
+ self.assertEqual(actual_mode_1, default_mode)
+
+ # try creating w/ same subvolume name with --mode 777
+ new_mode = "777"
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", new_mode)
+
+ actual_mode_2 = self.mount_a.run_shell(['stat', '-c' '%a', subvol_path]).stdout.getvalue().strip()
+ self.assertEqual(actual_mode_2, new_mode)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_idempotence_without_passing_mode(self):
+ # create subvolume
+ desired_mode = "777"
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", desired_mode)
+
+ subvol_path = self._get_subvolume_path(self.volname, subvolume)
+
+ actual_mode_1 = self.mount_a.run_shell(['stat', '-c' '%a', subvol_path]).stdout.getvalue().strip()
+ self.assertEqual(actual_mode_1, desired_mode)
+
+ # default mode
+ default_mode = "755"
+
+ # try creating w/ same subvolume name without passing --mode argument
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ actual_mode_2 = self.mount_a.run_shell(['stat', '-c' '%a', subvol_path]).stdout.getvalue().strip()
+ self.assertEqual(actual_mode_2, default_mode)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_isolated_namespace(self):
+ """
+ Create subvolume in separate rados namespace
+ """
+
+ # create subvolume
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--namespace-isolated")
+
+ # get subvolume metadata
+ subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+ self.assertNotEqual(len(subvol_info), 0)
+ self.assertEqual(subvol_info["pool_namespace"], "fsvolumens_" + subvolume)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_with_auto_cleanup_on_fail(self):
+ subvolume = self._generate_random_subvolume_name()
+ data_pool = "invalid_pool"
+ # create subvolume with invalid data pool layout fails
+ with self.assertRaises(CommandFailedError):
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool)
+
+ # check whether subvol path is cleaned up
+ try:
+ self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of non-existent subvolume")
+ else:
+ self.fail("expected the 'fs subvolume getpath' command to fail")
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_with_desired_data_pool_layout_in_group(self):
+ subvol1, subvol2 = self._generate_random_subvolume_name(2)
+ group = self._generate_random_group_name()
+
+ # create group. this also helps set default pool layout for subvolumes
+ # created within the group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvol1, "--group_name", group)
+ subvol1_path = self._get_subvolume_path(self.volname, subvol1, group_name=group)
+
+ default_pool = self.mount_a.getfattr(subvol1_path, "ceph.dir.layout.pool")
+ new_pool = "new_pool"
+ self.assertNotEqual(default_pool, new_pool)
+
+ # add data pool
+ newid = self.fs.add_data_pool(new_pool)
+
+ # create subvolume specifying the new data pool as its pool layout
+ self._fs_cmd("subvolume", "create", self.volname, subvol2, "--group_name", group,
+ "--pool_layout", new_pool)
+ subvol2_path = self._get_subvolume_path(self.volname, subvol2, group_name=group)
+
+ desired_pool = self.mount_a.getfattr(subvol2_path, "ceph.dir.layout.pool")
+ try:
+ self.assertEqual(desired_pool, new_pool)
+ except AssertionError:
+ self.assertEqual(int(desired_pool), newid) # old kernel returns id
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvol2, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvol1, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_with_desired_mode(self):
+ subvol1 = self._generate_random_subvolume_name()
+
+ # default mode
+ default_mode = "755"
+ # desired mode
+ desired_mode = "777"
+
+ self._fs_cmd("subvolume", "create", self.volname, subvol1, "--mode", "777")
+
+ subvol1_path = self._get_subvolume_path(self.volname, subvol1)
+
+ # check subvolumegroup's mode
+ subvol_par_path = os.path.dirname(subvol1_path)
+ group_path = os.path.dirname(subvol_par_path)
+ actual_mode1 = self.mount_a.run_shell(['stat', '-c' '%a', group_path]).stdout.getvalue().strip()
+ self.assertEqual(actual_mode1, default_mode)
+ # check /volumes mode
+ volumes_path = os.path.dirname(group_path)
+ actual_mode2 = self.mount_a.run_shell(['stat', '-c' '%a', volumes_path]).stdout.getvalue().strip()
+ self.assertEqual(actual_mode2, default_mode)
+ # check subvolume's mode
+ actual_mode3 = self.mount_a.run_shell(['stat', '-c' '%a', subvol1_path]).stdout.getvalue().strip()
+ self.assertEqual(actual_mode3, desired_mode)
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvol1)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_with_desired_mode_in_group(self):
+ subvol1, subvol2, subvol3 = self._generate_random_subvolume_name(3)
+
+ group = self._generate_random_group_name()
+ # default mode
+ expected_mode1 = "755"
+ # desired mode
+ expected_mode2 = "777"
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvol1, "--group_name", group)
+ self._fs_cmd("subvolume", "create", self.volname, subvol2, "--group_name", group, "--mode", "777")
+ # check whether mode 0777 also works
+ self._fs_cmd("subvolume", "create", self.volname, subvol3, "--group_name", group, "--mode", "0777")
+
+ subvol1_path = self._get_subvolume_path(self.volname, subvol1, group_name=group)
+ subvol2_path = self._get_subvolume_path(self.volname, subvol2, group_name=group)
+ subvol3_path = self._get_subvolume_path(self.volname, subvol3, group_name=group)
+
+ # check subvolume's mode
+ actual_mode1 = self.mount_a.run_shell(['stat', '-c' '%a', subvol1_path]).stdout.getvalue().strip()
+ actual_mode2 = self.mount_a.run_shell(['stat', '-c' '%a', subvol2_path]).stdout.getvalue().strip()
+ actual_mode3 = self.mount_a.run_shell(['stat', '-c' '%a', subvol3_path]).stdout.getvalue().strip()
+ self.assertEqual(actual_mode1, expected_mode1)
+ self.assertEqual(actual_mode2, expected_mode2)
+ self.assertEqual(actual_mode3, expected_mode2)
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvol1, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvol2, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvol3, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_with_desired_uid_gid(self):
+ """
+ That the subvolume can be created with the desired uid and gid and its uid and gid matches the
+ expected values.
+ """
+ uid = 1000
+ gid = 1000
+
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--uid", str(uid), "--gid", str(gid))
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # verify the uid and gid
+ suid = int(self.mount_a.run_shell(['stat', '-c' '%u', subvolpath]).stdout.getvalue().strip())
+ sgid = int(self.mount_a.run_shell(['stat', '-c' '%g', subvolpath]).stdout.getvalue().strip())
+ self.assertEqual(uid, suid)
+ self.assertEqual(gid, sgid)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_with_invalid_data_pool_layout(self):
+ subvolume = self._generate_random_subvolume_name()
+ data_pool = "invalid_pool"
+ # create subvolume with invalid data pool layout
+ try:
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on create of subvolume with invalid pool layout")
+ else:
+ self.fail("expected the 'fs subvolume create' command to fail")
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_with_invalid_size(self):
+ # create subvolume with an invalid size -1
+ subvolume = self._generate_random_subvolume_name()
+ try:
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--size", "-1")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on create of subvolume with invalid size")
+ else:
+ self.fail("expected the 'fs subvolume create' command to fail")
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_create_and_ls_providing_group_as_nogroup(self):
+ """
+ That a 'subvolume create' and 'subvolume ls' should throw
+ permission denied error if option --group=_nogroup is provided.
+ """
+
+ subvolname = self._generate_random_subvolume_name()
+
+ # try to create subvolume providing --group_name=_nogroup option
+ try:
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", "_nogroup")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EPERM)
+ else:
+ self.fail("expected the 'fs subvolume create' command to fail")
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolname)
+
+ # try to list subvolumes providing --group_name=_nogroup option
+ try:
+ self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_nogroup")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EPERM)
+ else:
+ self.fail("expected the 'fs subvolume ls' command to fail")
+
+ # list subvolumes
+ self._fs_cmd("subvolume", "ls", self.volname)
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_expand(self):
+ """
+ That a subvolume can be expanded in size and its quota matches the expected size.
+ """
+
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ osize = self.DEFAULT_FILE_SIZE*1024*1024
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # expand the subvolume
+ nsize = osize*2
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+
+ # verify the quota
+ size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+ self.assertEqual(size, nsize)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_info(self):
+ # tests the 'fs subvolume info' command
+
+ subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime",
+ "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace",
+ "type", "uid", "features", "state"]
+
+ # create subvolume
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # get subvolume metadata
+ subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+ for md in subvol_md:
+ self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md))
+
+ self.assertEqual(subvol_info["bytes_pcent"], "undefined", "bytes_pcent should be set to undefined if quota is not set")
+ self.assertEqual(subvol_info["bytes_quota"], "infinite", "bytes_quota should be set to infinite if quota is not set")
+ self.assertEqual(subvol_info["pool_namespace"], "", "expected pool namespace to be empty")
+ self.assertEqual(subvol_info["state"], "complete", "expected state to be complete")
+
+ self.assertEqual(len(subvol_info["features"]), 3,
+ msg="expected 3 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"]))
+ for feature in ['snapshot-clone', 'snapshot-autoprotect', 'snapshot-retention']:
+ self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature))
+
+ nsize = self.DEFAULT_FILE_SIZE*1024*1024
+ self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize))
+
+ # get subvolume metadata after quota set
+ subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+ for md in subvol_md:
+ self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md))
+
+ self.assertNotEqual(subvol_info["bytes_pcent"], "undefined", "bytes_pcent should not be set to undefined if quota is not set")
+ self.assertEqual(subvol_info["bytes_quota"], nsize, "bytes_quota should be set to '{0}'".format(nsize))
+ self.assertEqual(subvol_info["type"], "subvolume", "type should be set to subvolume")
+ self.assertEqual(subvol_info["state"], "complete", "expected state to be complete")
+
+ self.assertEqual(len(subvol_info["features"]), 3,
+ msg="expected 3 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"]))
+ for feature in ['snapshot-clone', 'snapshot-autoprotect', 'snapshot-retention']:
+ self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature))
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_ls(self):
+ # tests the 'fs subvolume ls' command
+
+ subvolumes = []
+
+ # create subvolumes
+ subvolumes = self._generate_random_subvolume_name(3)
+ for subvolume in subvolumes:
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # list subvolumes
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ if len(subvolumels) == 0:
+ self.fail("Expected the 'fs subvolume ls' command to list the created subvolumes.")
+ else:
+ subvolnames = [subvolume['name'] for subvolume in subvolumels]
+ if collections.Counter(subvolnames) != collections.Counter(subvolumes):
+ self.fail("Error creating or listing subvolumes")
+
+ # remove subvolume
+ for subvolume in subvolumes:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_ls_with_groupname_as_internal_directory(self):
+ # tests the 'fs subvolume ls' command when the default groupname as internal directories
+ # Eg: '_nogroup', '_legacy', '_deleting', '_index'.
+ # Expecting 'fs subvolume ls' will be fail with errno EINVAL for '_legacy', '_deleting', '_index'
+ # Expecting 'fs subvolume ls' will be fail with errno EPERM for '_nogroup'
+
+ # try to list subvolumes providing --group_name=_nogroup option
+ try:
+ self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_nogroup")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EPERM)
+ else:
+ self.fail("expected the 'fs subvolume ls' command to fail with error 'EPERM' for _nogroup")
+
+ # try to list subvolumes providing --group_name=_legacy option
+ try:
+ self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_legacy")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL)
+ else:
+ self.fail("expected the 'fs subvolume ls' command to fail with error 'EINVAL' for _legacy")
+
+ # try to list subvolumes providing --group_name=_deleting option
+ try:
+ self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_deleting")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL)
+ else:
+ self.fail("expected the 'fs subvolume ls' command to fail with error 'EINVAL' for _deleting")
+
+ # try to list subvolumes providing --group_name=_index option
+ try:
+ self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_index")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL)
+ else:
+ self.fail("expected the 'fs subvolume ls' command to fail with error 'EINVAL' for _index")
+
+ def test_subvolume_ls_for_notexistent_default_group(self):
+ # tests the 'fs subvolume ls' command when the default group '_nogroup' doesn't exist
+ # prerequisite: we expect that the volume is created and the default group _nogroup is
+ # NOT created (i.e. a subvolume without group is not created)
+
+ # list subvolumes
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ if len(subvolumels) > 0:
+ raise RuntimeError("Expected the 'fs subvolume ls' command to output an empty list.")
+
+ def test_subvolume_marked(self):
+ """
+ ensure a subvolume is marked with the ceph.dir.subvolume xattr
+ """
+ subvolume = self._generate_random_subvolume_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # getpath
+ subvolpath = self._get_subvolume_path(self.volname, subvolume)
+
+ # subdirectory of a subvolume cannot be moved outside the subvolume once marked with
+ # the xattr ceph.dir.subvolume, hence test by attempting to rename subvol path (incarnation)
+ # outside the subvolume
+ dstpath = os.path.join(self.mount_a.mountpoint, 'volumes', '_nogroup', 'new_subvol_location')
+ srcpath = os.path.join(self.mount_a.mountpoint, subvolpath)
+ rename_script = dedent("""
+ import os
+ import errno
+ try:
+ os.rename("{src}", "{dst}")
+ except OSError as e:
+ if e.errno != errno.EXDEV:
+ raise RuntimeError("invalid error code on renaming subvolume incarnation out of subvolume directory")
+ else:
+ raise RuntimeError("expected renaming subvolume incarnation out of subvolume directory to fail")
+ """)
+ self.mount_a.run_python(rename_script.format(src=srcpath, dst=dstpath), sudo=True)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_pin_export(self):
+ self.fs.set_max_mds(2)
+ status = self.fs.wait_for_daemons()
+
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+ self._fs_cmd("subvolume", "pin", self.volname, subvolume, "export", "1")
+ path = self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
+ path = os.path.dirname(path) # get subvolume path
+
+ self._get_subtrees(status=status, rank=1)
+ self._wait_subtrees([(path, 1)], status=status)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ ### authorize operations
+
+ def test_authorize_deauthorize_legacy_subvolume(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ authid = "alice"
+
+ guest_mount = self.mount_b
+ guest_mount.umount_wait()
+
+ # emulate a old-fashioned subvolume in a custom group
+ createpath = os.path.join(".", "volumes", group, subvolume)
+ self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False)
+
+ # add required xattrs to subvolume
+ default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool")
+ self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True)
+
+ mount_path = os.path.join("/", "volumes", group, subvolume)
+
+ # authorize guest authID read-write access to subvolume
+ key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid,
+ "--group_name", group, "--tenant_id", "tenant_id")
+
+ # guest authID should exist
+ existing_ids = [a['entity'] for a in self.auth_list()]
+ self.assertIn("client.{0}".format(authid), existing_ids)
+
+ # configure credentials for guest client
+ self._configure_guest_auth(guest_mount, authid, key)
+
+ # mount the subvolume, and write to it
+ guest_mount.mount_wait(cephfs_mntpt=mount_path)
+ guest_mount.write_n_mb("data.bin", 1)
+
+ # authorize guest authID read access to subvolume
+ key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid,
+ "--group_name", group, "--tenant_id", "tenant_id", "--access_level", "r")
+
+ # guest client sees the change in access level to read only after a
+ # remount of the subvolume.
+ guest_mount.umount_wait()
+ guest_mount.mount_wait(cephfs_mntpt=mount_path)
+
+ # read existing content of the subvolume
+ self.assertListEqual(guest_mount.ls(guest_mount.mountpoint), ["data.bin"])
+ # cannot write into read-only subvolume
+ with self.assertRaises(CommandFailedError):
+ guest_mount.write_n_mb("rogue.bin", 1)
+
+ # cleanup
+ guest_mount.umount_wait()
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid,
+ "--group_name", group)
+ # guest authID should no longer exist
+ existing_ids = [a['entity'] for a in self.auth_list()]
+ self.assertNotIn("client.{0}".format(authid), existing_ids)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_authorize_deauthorize_subvolume(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ authid = "alice"
+
+ guest_mount = self.mount_b
+ guest_mount.umount_wait()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group, "--mode=777")
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+ mount_path = self._fs_cmd("subvolume", "getpath", self.volname, subvolume,
+ "--group_name", group).rstrip()
+
+ # authorize guest authID read-write access to subvolume
+ key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid,
+ "--group_name", group, "--tenant_id", "tenant_id")
+
+ # guest authID should exist
+ existing_ids = [a['entity'] for a in self.auth_list()]
+ self.assertIn("client.{0}".format(authid), existing_ids)
+
+ # configure credentials for guest client
+ self._configure_guest_auth(guest_mount, authid, key)
+
+ # mount the subvolume, and write to it
+ guest_mount.mount_wait(cephfs_mntpt=mount_path)
+ guest_mount.write_n_mb("data.bin", 1)
+
+ # authorize guest authID read access to subvolume
+ key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid,
+ "--group_name", group, "--tenant_id", "tenant_id", "--access_level", "r")
+
+ # guest client sees the change in access level to read only after a
+ # remount of the subvolume.
+ guest_mount.umount_wait()
+ guest_mount.mount_wait(cephfs_mntpt=mount_path)
+
+ # read existing content of the subvolume
+ self.assertListEqual(guest_mount.ls(guest_mount.mountpoint), ["data.bin"])
+ # cannot write into read-only subvolume
+ with self.assertRaises(CommandFailedError):
+ guest_mount.write_n_mb("rogue.bin", 1)
+
+ # cleanup
+ guest_mount.umount_wait()
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid,
+ "--group_name", group)
+ # guest authID should no longer exist
+ existing_ids = [a['entity'] for a in self.auth_list()]
+ self.assertNotIn("client.{0}".format(authid), existing_ids)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_multitenant_subvolumes(self):
+ """
+ That subvolume access can be restricted to a tenant.
+
+ That metadata used to enforce tenant isolation of
+ subvolumes is stored as a two-way mapping between auth
+ IDs and subvolumes that they're authorized to access.
+ """
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ guest_mount = self.mount_b
+
+ # Guest clients belonging to different tenants, but using the same
+ # auth ID.
+ auth_id = "alice"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+ guestclient_2 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant2",
+ }
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # Check that subvolume metadata file is created on subvolume creation.
+ subvol_metadata_filename = "_{0}:{1}.meta".format(group, subvolume)
+ self.assertIn(subvol_metadata_filename, guest_mount.ls("volumes"))
+
+ # Authorize 'guestclient_1', using auth ID 'alice' and belonging to
+ # 'tenant1', with 'rw' access to the volume.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ # Check that auth metadata file for auth ID 'alice', is
+ # created on authorizing 'alice' access to the subvolume.
+ auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+ self.assertIn(auth_metadata_filename, guest_mount.ls("volumes"))
+
+ # Verify that the auth metadata file stores the tenant ID that the
+ # auth ID belongs to, the auth ID's authorized access levels
+ # for different subvolumes, versioning details, etc.
+ expected_auth_metadata = {
+ "version": 5,
+ "compat_version": 6,
+ "dirty": False,
+ "tenant_id": "tenant1",
+ "subvolumes": {
+ "{0}/{1}".format(group,subvolume): {
+ "dirty": False,
+ "access_level": "rw"
+ }
+ }
+ }
+
+ auth_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(auth_metadata_filename)))
+ self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"])
+ del expected_auth_metadata["version"]
+ del auth_metadata["version"]
+ self.assertEqual(expected_auth_metadata, auth_metadata)
+
+ # Verify that the subvolume metadata file stores info about auth IDs
+ # and their access levels to the subvolume, versioning details, etc.
+ expected_subvol_metadata = {
+ "version": 1,
+ "compat_version": 1,
+ "auths": {
+ "alice": {
+ "dirty": False,
+ "access_level": "rw"
+ }
+ }
+ }
+ subvol_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(subvol_metadata_filename)))
+
+ self.assertGreaterEqual(subvol_metadata["version"], expected_subvol_metadata["version"])
+ del expected_subvol_metadata["version"]
+ del subvol_metadata["version"]
+ self.assertEqual(expected_subvol_metadata, subvol_metadata)
+
+ # Cannot authorize 'guestclient_2' to access the volume.
+ # It uses auth ID 'alice', which has already been used by a
+ # 'guestclient_1' belonging to an another tenant for accessing
+ # the volume.
+
+ try:
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_2["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_2["tenant_id"])
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EPERM,
+ "Invalid error code returned on authorize of subvolume with same auth_id but different tenant_id")
+ else:
+ self.fail("expected the 'fs subvolume authorize' command to fail")
+
+ # Check that auth metadata file is cleaned up on removing
+ # auth ID's only access to a volume.
+
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id,
+ "--group_name", group)
+ self.assertNotIn(auth_metadata_filename, guest_mount.ls("volumes"))
+
+ # Check that subvolume metadata file is cleaned up on subvolume deletion.
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ self.assertNotIn(subvol_metadata_filename, guest_mount.ls("volumes"))
+
+ # clean up
+ guest_mount.umount_wait()
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_authorized_list(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ authid1 = "alice"
+ authid2 = "guest1"
+ authid3 = "guest2"
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # authorize alice authID read-write access to subvolume
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid1,
+ "--group_name", group)
+ # authorize guest1 authID read-write access to subvolume
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid2,
+ "--group_name", group)
+ # authorize guest2 authID read access to subvolume
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid3,
+ "--group_name", group, "--access_level", "r")
+
+ # list authorized-ids of the subvolume
+ expected_auth_list = [{'alice': 'rw'}, {'guest1': 'rw'}, {'guest2': 'r'}]
+ auth_list = json.loads(self._fs_cmd('subvolume', 'authorized_list', self.volname, subvolume, "--group_name", group))
+ self.assertCountEqual(expected_auth_list, auth_list)
+
+ # cleanup
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid1,
+ "--group_name", group)
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid2,
+ "--group_name", group)
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid3,
+ "--group_name", group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_authorize_auth_id_not_created_by_mgr_volumes(self):
+ """
+ If the auth_id already exists and is not created by mgr plugin,
+ it's not allowed to authorize the auth-id by default.
+ """
+
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # Create auth_id
+ self.fs.mon_manager.raw_cluster_cmd(
+ "auth", "get-or-create", "client.guest1",
+ "mds", "allow *",
+ "osd", "allow rw",
+ "mon", "allow *"
+ )
+
+ auth_id = "guest1"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ try:
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EPERM,
+ "Invalid error code returned on authorize of subvolume for auth_id created out of band")
+ else:
+ self.fail("expected the 'fs subvolume authorize' command to fail")
+
+ # clean up
+ self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_authorize_allow_existing_id_option(self):
+ """
+ If the auth_id already exists and is not created by mgr volumes,
+ it's not allowed to authorize the auth-id by default but is
+ allowed with option allow_existing_id.
+ """
+
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # Create auth_id
+ self.fs.mon_manager.raw_cluster_cmd(
+ "auth", "get-or-create", "client.guest1",
+ "mds", "allow *",
+ "osd", "allow rw",
+ "mon", "allow *"
+ )
+
+ auth_id = "guest1"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # Cannot authorize 'guestclient_1' to access the volume by default,
+ # which already exists and not created by mgr volumes but is allowed
+ # with option 'allow_existing_id'.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"], "--allow-existing-id")
+
+ # clean up
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id,
+ "--group_name", group)
+ self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_deauthorize_auth_id_after_out_of_band_update(self):
+ """
+ If the auth_id authorized by mgr/volumes plugin is updated
+ out of band, the auth_id should not be deleted after a
+ deauthorize. It should only remove caps associated with it.
+ """
+
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ auth_id = "guest1"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # Authorize 'guestclient_1' to access the subvolume.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ subvol_path = self._fs_cmd("subvolume", "getpath", self.volname, subvolume,
+ "--group_name", group).rstrip()
+
+ # Update caps for guestclient_1 out of band
+ out = self.fs.mon_manager.raw_cluster_cmd(
+ "auth", "caps", "client.guest1",
+ "mds", "allow rw path=/volumes/{0}, allow rw path={1}".format(group, subvol_path),
+ "osd", "allow rw pool=cephfs_data",
+ "mon", "allow r",
+ "mgr", "allow *"
+ )
+
+ # Deauthorize guestclient_1
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id, "--group_name", group)
+
+ # Validate the caps of guestclient_1 after deauthorize. It should not have deleted
+ # guestclient_1. The mgr and mds caps should be present which was updated out of band.
+ out = json.loads(self.fs.mon_manager.raw_cluster_cmd("auth", "get", "client.guest1", "--format=json-pretty"))
+
+ self.assertEqual("client.guest1", out[0]["entity"])
+ self.assertEqual("allow rw path=/volumes/{0}".format(group), out[0]["caps"]["mds"])
+ self.assertEqual("allow *", out[0]["caps"]["mgr"])
+ self.assertNotIn("osd", out[0]["caps"])
+
+ # clean up
+ out = self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_recover_auth_metadata_during_authorize(self):
+ """
+ That auth metadata manager can recover from partial auth updates using
+ metadata files, which store auth info and its update status info. This
+ test validates the recovery during authorize.
+ """
+
+ guest_mount = self.mount_b
+
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ auth_id = "guest1"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # Authorize 'guestclient_1' to access the subvolume.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ # Check that auth metadata file for auth ID 'guest1', is
+ # created on authorizing 'guest1' access to the subvolume.
+ auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+ self.assertIn(auth_metadata_filename, guest_mount.ls("volumes"))
+ expected_auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename)))
+
+ # Induce partial auth update state by modifying the auth metadata file,
+ # and then run authorize again.
+ guest_mount.run_shell(['sudo', 'sed', '-i', 's/false/true/g', 'volumes/{0}'.format(auth_metadata_filename)], omit_sudo=False)
+
+ # Authorize 'guestclient_1' to access the subvolume.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename)))
+ self.assertEqual(auth_metadata_content, expected_auth_metadata_content)
+
+ # clean up
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id, "--group_name", group)
+ guest_mount.umount_wait()
+ self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_recover_auth_metadata_during_deauthorize(self):
+ """
+ That auth metadata manager can recover from partial auth updates using
+ metadata files, which store auth info and its update status info. This
+ test validates the recovery during deauthorize.
+ """
+
+ guest_mount = self.mount_b
+
+ subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+ group = self._generate_random_group_name()
+
+ guestclient_1 = {
+ "auth_id": "guest1",
+ "tenant_id": "tenant1",
+ }
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolumes in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group)
+ self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--group_name", group)
+
+ # Authorize 'guestclient_1' to access the subvolume1.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume1, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ # Check that auth metadata file for auth ID 'guest1', is
+ # created on authorizing 'guest1' access to the subvolume1.
+ auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+ self.assertIn(auth_metadata_filename, guest_mount.ls("volumes"))
+ expected_auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename)))
+
+ # Authorize 'guestclient_1' to access the subvolume2.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume2, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ # Induce partial auth update state by modifying the auth metadata file,
+ # and then run de-authorize.
+ guest_mount.run_shell(['sudo', 'sed', '-i', 's/false/true/g', 'volumes/{0}'.format(auth_metadata_filename)], omit_sudo=False)
+
+ # Deauthorize 'guestclient_1' to access the subvolume2.
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume2, guestclient_1["auth_id"],
+ "--group_name", group)
+
+ auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename)))
+ self.assertEqual(auth_metadata_content, expected_auth_metadata_content)
+
+ # clean up
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, "guest1", "--group_name", group)
+ guest_mount.umount_wait()
+ self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_update_old_style_auth_metadata_to_new_during_authorize(self):
+ """
+ CephVolumeClient stores the subvolume data in auth metadata file with
+ 'volumes' key as there was no subvolume namespace. It doesn't makes sense
+ with mgr/volumes. This test validates the transparent update of 'volumes'
+ key to 'subvolumes' key in auth metadata file during authorize.
+ """
+
+ guest_mount = self.mount_b
+
+ subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+ group = self._generate_random_group_name()
+
+ auth_id = "guest1"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolumes in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group)
+ self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--group_name", group)
+
+ # Authorize 'guestclient_1' to access the subvolume1.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume1, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ # Check that auth metadata file for auth ID 'guest1', is
+ # created on authorizing 'guest1' access to the subvolume1.
+ auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+ self.assertIn(auth_metadata_filename, guest_mount.ls("volumes"))
+
+ # Replace 'subvolumes' to 'volumes', old style auth-metadata file
+ guest_mount.run_shell(['sudo', 'sed', '-i', 's/subvolumes/volumes/g', 'volumes/{0}'.format(auth_metadata_filename)], omit_sudo=False)
+
+ # Authorize 'guestclient_1' to access the subvolume2. This should transparently update 'volumes' to 'subvolumes'
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume2, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ expected_auth_metadata = {
+ "version": 5,
+ "compat_version": 6,
+ "dirty": False,
+ "tenant_id": "tenant1",
+ "subvolumes": {
+ "{0}/{1}".format(group,subvolume1): {
+ "dirty": False,
+ "access_level": "rw"
+ },
+ "{0}/{1}".format(group,subvolume2): {
+ "dirty": False,
+ "access_level": "rw"
+ }
+ }
+ }
+
+ auth_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(auth_metadata_filename)))
+
+ self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"])
+ del expected_auth_metadata["version"]
+ del auth_metadata["version"]
+ self.assertEqual(expected_auth_metadata, auth_metadata)
+
+ # clean up
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, auth_id, "--group_name", group)
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume2, auth_id, "--group_name", group)
+ guest_mount.umount_wait()
+ self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_update_old_style_auth_metadata_to_new_during_deauthorize(self):
+ """
+ CephVolumeClient stores the subvolume data in auth metadata file with
+ 'volumes' key as there was no subvolume namespace. It doesn't makes sense
+ with mgr/volumes. This test validates the transparent update of 'volumes'
+ key to 'subvolumes' key in auth metadata file during deauthorize.
+ """
+
+ guest_mount = self.mount_b
+
+ subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+ group = self._generate_random_group_name()
+
+ auth_id = "guest1"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolumes in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group)
+ self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--group_name", group)
+
+ # Authorize 'guestclient_1' to access the subvolume1.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume1, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ # Authorize 'guestclient_1' to access the subvolume2.
+ self._fs_cmd("subvolume", "authorize", self.volname, subvolume2, guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ # Check that auth metadata file for auth ID 'guest1', is created.
+ auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"])
+ self.assertIn(auth_metadata_filename, guest_mount.ls("volumes"))
+
+ # Replace 'subvolumes' to 'volumes', old style auth-metadata file
+ guest_mount.run_shell(['sudo', 'sed', '-i', 's/subvolumes/volumes/g', 'volumes/{0}'.format(auth_metadata_filename)], omit_sudo=False)
+
+ # Deauthorize 'guestclient_1' to access the subvolume2. This should update 'volumes' to subvolumes'
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume2, auth_id, "--group_name", group)
+
+ expected_auth_metadata = {
+ "version": 5,
+ "compat_version": 6,
+ "dirty": False,
+ "tenant_id": "tenant1",
+ "subvolumes": {
+ "{0}/{1}".format(group,subvolume1): {
+ "dirty": False,
+ "access_level": "rw"
+ }
+ }
+ }
+
+ auth_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(auth_metadata_filename)))
+
+ self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"])
+ del expected_auth_metadata["version"]
+ del auth_metadata["version"]
+ self.assertEqual(expected_auth_metadata, auth_metadata)
+
+ # clean up
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, auth_id, "--group_name", group)
+ guest_mount.umount_wait()
+ self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_evict_client(self):
+ """
+ That a subvolume client can be evicted based on the auth ID
+ """
+
+ subvolumes = self._generate_random_subvolume_name(2)
+ group = self._generate_random_group_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # mounts[0] and mounts[1] would be used as guests to mount the volumes/shares.
+ for i in range(0, 2):
+ self.mounts[i].umount_wait()
+ guest_mounts = (self.mounts[0], self.mounts[1])
+ auth_id = "guest"
+ guestclient_1 = {
+ "auth_id": auth_id,
+ "tenant_id": "tenant1",
+ }
+
+ # Create two subvolumes. Authorize 'guest' auth ID to mount the two
+ # subvolumes. Mount the two subvolumes. Write data to the volumes.
+ for i in range(2):
+ # Create subvolume.
+ self._fs_cmd("subvolume", "create", self.volname, subvolumes[i], "--group_name", group, "--mode=777")
+
+ # authorize guest authID read-write access to subvolume
+ key = self._fs_cmd("subvolume", "authorize", self.volname, subvolumes[i], guestclient_1["auth_id"],
+ "--group_name", group, "--tenant_id", guestclient_1["tenant_id"])
+
+ mount_path = self._fs_cmd("subvolume", "getpath", self.volname, subvolumes[i],
+ "--group_name", group).rstrip()
+ # configure credentials for guest client
+ self._configure_guest_auth(guest_mounts[i], auth_id, key)
+
+ # mount the subvolume, and write to it
+ guest_mounts[i].mount_wait(cephfs_mntpt=mount_path)
+ guest_mounts[i].write_n_mb("data.bin", 1)
+
+ # Evict client, guest_mounts[0], using auth ID 'guest' and has mounted
+ # one volume.
+ self._fs_cmd("subvolume", "evict", self.volname, subvolumes[0], auth_id, "--group_name", group)
+
+ # Evicted guest client, guest_mounts[0], should not be able to do
+ # anymore metadata ops. It should start failing all operations
+ # when it sees that its own address is in the blocklist.
+ try:
+ guest_mounts[0].write_n_mb("rogue.bin", 1)
+ except CommandFailedError:
+ pass
+ else:
+ raise RuntimeError("post-eviction write should have failed!")
+
+ # The blocklisted guest client should now be unmountable
+ guest_mounts[0].umount_wait()
+
+ # Guest client, guest_mounts[1], using the same auth ID 'guest', but
+ # has mounted the other volume, should be able to use its volume
+ # unaffected.
+ guest_mounts[1].write_n_mb("data.bin.1", 1)
+
+ # Cleanup.
+ guest_mounts[1].umount_wait()
+ for i in range(2):
+ self._fs_cmd("subvolume", "deauthorize", self.volname, subvolumes[i], auth_id, "--group_name", group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolumes[i], "--group_name", group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_pin_random(self):
+ self.fs.set_max_mds(2)
+ self.fs.wait_for_daemons()
+ self.config_set('mds', 'mds_export_ephemeral_random', True)
+
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+ self._fs_cmd("subvolume", "pin", self.volname, subvolume, "random", ".01")
+ # no verification
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_resize_fail_invalid_size(self):
+ """
+ That a subvolume cannot be resized to an invalid size and the quota did not change
+ """
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # try to resize the subvolume with an invalid size -10
+ nsize = -10
+ try:
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size")
+ else:
+ self.fail("expected the 'fs subvolume resize' command to fail")
+
+ # verify the quota did not change
+ size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+ self.assertEqual(size, osize)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_resize_fail_zero_size(self):
+ """
+ That a subvolume cannot be resized to a zero size and the quota did not change
+ """
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # try to resize the subvolume with size 0
+ nsize = 0
+ try:
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size")
+ else:
+ self.fail("expected the 'fs subvolume resize' command to fail")
+
+ # verify the quota did not change
+ size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+ self.assertEqual(size, osize)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_resize_quota_lt_used_size(self):
+ """
+ That a subvolume can be resized to a size smaller than the current used size
+ and the resulting quota matches the expected size.
+ """
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*20
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777")
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # create one file of 10MB
+ file_size=self.DEFAULT_FILE_SIZE*10
+ number_of_files=1
+ log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+ number_of_files,
+ file_size))
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+1)
+ self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+
+ usedsize = int(self.mount_a.getfattr(subvolpath, "ceph.dir.rbytes"))
+ susedsize = int(self.mount_a.run_shell(['stat', '-c' '%s', subvolpath]).stdout.getvalue().strip())
+ if isinstance(self.mount_a, FuseMount):
+ # kclient dir does not have size==rbytes
+ self.assertEqual(usedsize, susedsize)
+
+ # shrink the subvolume
+ nsize = usedsize // 2
+ try:
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume resize' command to succeed")
+
+ # verify the quota
+ size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+ self.assertEqual(size, nsize)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_resize_fail_quota_lt_used_size_no_shrink(self):
+ """
+ That a subvolume cannot be resized to a size smaller than the current used size
+ when --no_shrink is given and the quota did not change.
+ """
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*20
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777")
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # create one file of 10MB
+ file_size=self.DEFAULT_FILE_SIZE*10
+ number_of_files=1
+ log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+ number_of_files,
+ file_size))
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+2)
+ self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+
+ usedsize = int(self.mount_a.getfattr(subvolpath, "ceph.dir.rbytes"))
+ susedsize = int(self.mount_a.run_shell(['stat', '-c' '%s', subvolpath]).stdout.getvalue().strip())
+ if isinstance(self.mount_a, FuseMount):
+ # kclient dir does not have size==rbytes
+ self.assertEqual(usedsize, susedsize)
+
+ # shrink the subvolume
+ nsize = usedsize // 2
+ try:
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize), "--no_shrink")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size")
+ else:
+ self.fail("expected the 'fs subvolume resize' command to fail")
+
+ # verify the quota did not change
+ size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+ self.assertEqual(size, osize)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_resize_expand_on_full_subvolume(self):
+ """
+ That the subvolume can be expanded from a full subvolume and future writes succeed.
+ """
+
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*10
+ # create subvolume of quota 10MB and make sure it exists
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777")
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # create one file of size 10MB and write
+ file_size=self.DEFAULT_FILE_SIZE*10
+ number_of_files=1
+ log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+ number_of_files,
+ file_size))
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+3)
+ self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+
+ # create a file of size 5MB and try write more
+ file_size=file_size // 2
+ number_of_files=1
+ log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+ number_of_files,
+ file_size))
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+4)
+ try:
+ self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+ except CommandFailedError:
+ # Not able to write. So expand the subvolume more and try writing the 5MB file again
+ nsize = osize*2
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+ try:
+ self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+ except CommandFailedError:
+ self.fail("expected filling subvolume {0} with {1} file of size {2}MB"
+ "to succeed".format(subvolname, number_of_files, file_size))
+ else:
+ self.fail("expected filling subvolume {0} with {1} file of size {2}MB"
+ "to fail".format(subvolname, number_of_files, file_size))
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_resize_infinite_size(self):
+ """
+ That a subvolume can be resized to an infinite size by unsetting its quota.
+ """
+
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size",
+ str(self.DEFAULT_FILE_SIZE*1024*1024))
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # resize inf
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, "inf")
+
+ # verify that the quota is None
+ size = self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")
+ self.assertEqual(size, None)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_resize_infinite_size_future_writes(self):
+ """
+ That a subvolume can be resized to an infinite size and the future writes succeed.
+ """
+
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size",
+ str(self.DEFAULT_FILE_SIZE*1024*1024*5), "--mode=777")
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # resize inf
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, "inf")
+
+ # verify that the quota is None
+ size = self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")
+ self.assertEqual(size, None)
+
+ # create one file of 10MB and try to write
+ file_size=self.DEFAULT_FILE_SIZE*10
+ number_of_files=1
+ log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname,
+ number_of_files,
+ file_size))
+ filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+5)
+
+ try:
+ self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size)
+ except CommandFailedError:
+ self.fail("expected filling subvolume {0} with {1} file of size {2}MB "
+ "to succeed".format(subvolname, number_of_files, file_size))
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_rm_force(self):
+ # test removing non-existing subvolume with --force
+ subvolume = self._generate_random_subvolume_name()
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--force")
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume rm --force' command to succeed")
+
+ def test_subvolume_exists_with_subvolumegroup_and_subvolume(self):
+ """Test the presence of any subvolume by specifying the name of subvolumegroup"""
+
+ group = self._generate_random_group_name()
+ subvolume1 = self._generate_random_subvolume_name()
+ # create subvolumegroup
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group)
+ ret = self._fs_cmd("subvolume", "exist", self.volname, "--group_name", group)
+ self.assertEqual(ret.strip('\n'), "subvolume exists")
+ # delete subvolume in group
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group)
+ ret = self._fs_cmd("subvolume", "exist", self.volname, "--group_name", group)
+ self.assertEqual(ret.strip('\n'), "no subvolume exists")
+ # delete subvolumegroup
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_exists_with_subvolumegroup_and_no_subvolume(self):
+ """Test the presence of any subvolume specifying the name
+ of subvolumegroup and no subvolumes"""
+
+ group = self._generate_random_group_name()
+ # create subvolumegroup
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+ ret = self._fs_cmd("subvolume", "exist", self.volname, "--group_name", group)
+ self.assertEqual(ret.strip('\n'), "no subvolume exists")
+ # delete subvolumegroup
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_exists_without_subvolumegroup_and_with_subvolume(self):
+ """Test the presence of any subvolume without specifying the name
+ of subvolumegroup"""
+
+ subvolume1 = self._generate_random_subvolume_name()
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume1)
+ ret = self._fs_cmd("subvolume", "exist", self.volname)
+ self.assertEqual(ret.strip('\n'), "subvolume exists")
+ # delete subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume1)
+ ret = self._fs_cmd("subvolume", "exist", self.volname)
+ self.assertEqual(ret.strip('\n'), "no subvolume exists")
+
+ def test_subvolume_exists_without_subvolumegroup_and_without_subvolume(self):
+ """Test the presence of any subvolume without any subvolumegroup
+ and without any subvolume"""
+
+ ret = self._fs_cmd("subvolume", "exist", self.volname)
+ self.assertEqual(ret.strip('\n'), "no subvolume exists")
+
+ def test_subvolume_shrink(self):
+ """
+ That a subvolume can be shrinked in size and its quota matches the expected size.
+ """
+
+ # create subvolume
+ subvolname = self._generate_random_subvolume_name()
+ osize = self.DEFAULT_FILE_SIZE*1024*1024
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
+
+ # make sure it exists
+ subvolpath = self._get_subvolume_path(self.volname, subvolname)
+ self.assertNotEqual(subvolpath, None)
+
+ # shrink the subvolume
+ nsize = osize // 2
+ self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize))
+
+ # verify the quota
+ size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes"))
+ self.assertEqual(size, nsize)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_rm_idempotency(self):
+ """
+ ensure subvolume deletion of a subvolume which is already deleted with retain snapshots option passes.
+ After subvolume deletion with retain snapshots, the subvolume exists until the trash directory (resides inside subvolume)
+ is cleaned up. The subvolume deletion issued while the trash directory is not empty, should pass and should
+ not error out with EAGAIN.
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=256)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # remove snapshots (removes retained volume)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume (check idempotency)
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ self.fail(f"expected subvolume rm to pass with error: {os.strerror(ce.exitstatus)}")
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+
+ def test_subvolume_user_metadata_set(self):
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+ # set metadata for subvolume.
+ key = "key"
+ value = "value"
+ try:
+ self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume metadata set' command to succeed")
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_user_metadata_set_idempotence(self):
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+ # set metadata for subvolume.
+ key = "key"
+ value = "value"
+ try:
+ self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume metadata set' command to succeed")
+
+ # set same metadata again for subvolume.
+ try:
+ self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume metadata set' command to succeed because it is idempotent operation")
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_user_metadata_get(self):
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+ # set metadata for subvolume.
+ key = "key"
+ value = "value"
+ self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+
+ # get value for specified key.
+ try:
+ ret = self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume metadata get' command to succeed")
+
+ # remove '\n' from returned value.
+ ret = ret.strip('\n')
+
+ # match received value with expected value.
+ self.assertEqual(value, ret)
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_user_metadata_get_for_nonexisting_key(self):
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+ # set metadata for subvolume.
+ key = "key"
+ value = "value"
+ self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+
+ # try to get value for nonexisting key
+ # Expecting ENOENT exit status because key does not exist
+ try:
+ self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, "key_nonexist", "--group_name", group)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ self.fail("Expected ENOENT because 'key_nonexist' does not exist")
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_user_metadata_get_for_nonexisting_section(self):
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+ # try to get value for nonexisting key (as section does not exist)
+ # Expecting ENOENT exit status because key does not exist
+ try:
+ self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, "key", "--group_name", group)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ self.fail("Expected ENOENT because section does not exist")
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_user_metadata_update(self):
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+ # set metadata for subvolume.
+ key = "key"
+ value = "value"
+ self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+
+ # update metadata against key.
+ new_value = "new_value"
+ self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, new_value, "--group_name", group)
+
+ # get metadata for specified key of subvolume.
+ try:
+ ret = self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume metadata get' command to succeed")
+
+ # remove '\n' from returned value.
+ ret = ret.strip('\n')
+
+ # match received value with expected value.
+ self.assertEqual(new_value, ret)
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_user_metadata_list(self):
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+ # set metadata for subvolume.
+ input_metadata_dict = {f'key_{i}' : f'value_{i}' for i in range(3)}
+
+ for k, v in input_metadata_dict.items():
+ self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, k, v, "--group_name", group)
+
+ # list metadata
+ try:
+ ret = self._fs_cmd("subvolume", "metadata", "ls", self.volname, subvolname, "--group_name", group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume metadata ls' command to succeed")
+
+ ret_dict = json.loads(ret)
+
+ # compare output with expected output
+ self.assertDictEqual(input_metadata_dict, ret_dict)
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_user_metadata_list_if_no_metadata_set(self):
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+ # list metadata
+ try:
+ ret = self._fs_cmd("subvolume", "metadata", "ls", self.volname, subvolname, "--group_name", group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume metadata ls' command to succeed")
+
+ # remove '\n' from returned value.
+ ret = ret.strip('\n')
+
+ # compare output with expected output
+ # expecting empty json/dictionary
+ self.assertEqual(ret, "{}")
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_user_metadata_remove(self):
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+ # set metadata for subvolume.
+ key = "key"
+ value = "value"
+ self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+
+ # remove metadata against specified key.
+ try:
+ self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, key, "--group_name", group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume metadata rm' command to succeed")
+
+ # confirm key is removed by again fetching metadata
+ try:
+ self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ self.fail("Expected ENOENT because key does not exist")
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_user_metadata_remove_for_nonexisting_key(self):
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+ # set metadata for subvolume.
+ key = "key"
+ value = "value"
+ self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+
+ # try to remove value for nonexisting key
+ # Expecting ENOENT exit status because key does not exist
+ try:
+ self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, "key_nonexist", "--group_name", group)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ self.fail("Expected ENOENT because 'key_nonexist' does not exist")
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_user_metadata_remove_for_nonexisting_section(self):
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+ # try to remove value for nonexisting key (as section does not exist)
+ # Expecting ENOENT exit status because key does not exist
+ try:
+ self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, "key", "--group_name", group)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ self.fail("Expected ENOENT because section does not exist")
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_user_metadata_remove_force(self):
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+ # set metadata for subvolume.
+ key = "key"
+ value = "value"
+ self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+
+ # remove metadata against specified key with --force option.
+ try:
+ self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, key, "--group_name", group, "--force")
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume metadata rm' command to succeed")
+
+ # confirm key is removed by again fetching metadata
+ try:
+ self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ self.fail("Expected ENOENT because key does not exist")
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_user_metadata_remove_force_for_nonexisting_key(self):
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
+
+ # set metadata for subvolume.
+ key = "key"
+ value = "value"
+ self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+
+ # remove metadata against specified key.
+ try:
+ self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, key, "--group_name", group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume metadata rm' command to succeed")
+
+ # confirm key is removed by again fetching metadata
+ try:
+ self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ self.fail("Expected ENOENT because key does not exist")
+
+ # again remove metadata against already removed key with --force option.
+ try:
+ self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, key, "--group_name", group, "--force")
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume metadata rm' (with --force) command to succeed")
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_user_metadata_set_and_get_for_legacy_subvolume(self):
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # emulate a old-fashioned subvolume in a custom group
+ createpath = os.path.join(".", "volumes", group, subvolname)
+ self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False)
+
+ # set metadata for subvolume.
+ key = "key"
+ value = "value"
+ try:
+ self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume metadata set' command to succeed")
+
+ # get value for specified key.
+ try:
+ ret = self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume metadata get' command to succeed")
+
+ # remove '\n' from returned value.
+ ret = ret.strip('\n')
+
+ # match received value with expected value.
+ self.assertEqual(value, ret)
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_user_metadata_list_and_remove_for_legacy_subvolume(self):
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # emulate a old-fashioned subvolume in a custom group
+ createpath = os.path.join(".", "volumes", group, subvolname)
+ self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False)
+
+ # set metadata for subvolume.
+ input_metadata_dict = {f'key_{i}' : f'value_{i}' for i in range(3)}
+
+ for k, v in input_metadata_dict.items():
+ self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, k, v, "--group_name", group)
+
+ # list metadata
+ try:
+ ret = self._fs_cmd("subvolume", "metadata", "ls", self.volname, subvolname, "--group_name", group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume metadata ls' command to succeed")
+
+ ret_dict = json.loads(ret)
+
+ # compare output with expected output
+ self.assertDictEqual(input_metadata_dict, ret_dict)
+
+ # remove metadata against specified key.
+ try:
+ self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, "key_1", "--group_name", group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume metadata rm' command to succeed")
+
+ # confirm key is removed by again fetching metadata
+ try:
+ self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, "key_1", "--group_name", group)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ self.fail("Expected ENOENT because key_1 does not exist")
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+class TestSubvolumeGroupSnapshots(TestVolumesHelper):
+ """Tests for FS subvolume group snapshot operations."""
+ @unittest.skip("skipping subvolumegroup snapshot tests")
+ def test_nonexistent_subvolume_group_snapshot_rm(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # snapshot group
+ self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+
+ # remove snapshot
+ self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot)
+
+ # remove snapshot
+ try:
+ self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs subvolumegroup snapshot rm' command to fail")
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ @unittest.skip("skipping subvolumegroup snapshot tests")
+ def test_subvolume_group_snapshot_create_and_rm(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # snapshot group
+ self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+
+ # remove snapshot
+ self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ @unittest.skip("skipping subvolumegroup snapshot tests")
+ def test_subvolume_group_snapshot_idempotence(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # snapshot group
+ self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+
+ # try creating snapshot w/ same snapshot name -- shoule be idempotent
+ self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+
+ # remove snapshot
+ self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ @unittest.skip("skipping subvolumegroup snapshot tests")
+ def test_subvolume_group_snapshot_ls(self):
+ # tests the 'fs subvolumegroup snapshot ls' command
+
+ snapshots = []
+
+ # create group
+ group = self._generate_random_group_name()
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolumegroup snapshots
+ snapshots = self._generate_random_snapshot_name(3)
+ for snapshot in snapshots:
+ self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+
+ subvolgrpsnapshotls = json.loads(self._fs_cmd('subvolumegroup', 'snapshot', 'ls', self.volname, group))
+ if len(subvolgrpsnapshotls) == 0:
+ raise RuntimeError("Expected the 'fs subvolumegroup snapshot ls' command to list the created subvolume group snapshots")
+ else:
+ snapshotnames = [snapshot['name'] for snapshot in subvolgrpsnapshotls]
+ if collections.Counter(snapshotnames) != collections.Counter(snapshots):
+ raise RuntimeError("Error creating or listing subvolume group snapshots")
+
+ @unittest.skip("skipping subvolumegroup snapshot tests")
+ def test_subvolume_group_snapshot_rm_force(self):
+ # test removing non-existing subvolume group snapshot with --force
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+ # remove snapshot
+ try:
+ self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot, "--force")
+ except CommandFailedError:
+ raise RuntimeError("expected the 'fs subvolumegroup snapshot rm --force' command to succeed")
+
+ def test_subvolume_group_snapshot_unsupported_status(self):
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # snapshot group
+ try:
+ self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOSYS, "invalid error code on subvolumegroup snapshot create")
+ else:
+ self.fail("expected subvolumegroup snapshot create command to fail")
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+
+class TestSubvolumeSnapshots(TestVolumesHelper):
+ """Tests for FS subvolume snapshot operations."""
+ def test_nonexistent_subvolume_snapshot_rm(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove snapshot again
+ try:
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOENT:
+ raise
+ else:
+ raise RuntimeError("expected the 'fs subvolume snapshot rm' command to fail")
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_create_and_rm(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_create_idempotence(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # try creating w/ same subvolume snapshot name -- should be idempotent
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_info(self):
+
+ """
+ tests the 'fs subvolume snapshot info' command
+ """
+
+ snap_md = ["created_at", "data_pool", "has_pending_clones"]
+
+ subvolume = self._generate_random_subvolume_name()
+ snapshot, snap_missing = self._generate_random_snapshot_name(2)
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=1)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot))
+ for md in snap_md:
+ self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md))
+ self.assertEqual(snap_info["has_pending_clones"], "no")
+
+ # snapshot info for non-existent snapshot
+ try:
+ self._get_subvolume_snapshot_info(self.volname, subvolume, snap_missing)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on snapshot info of non-existent snapshot")
+ else:
+ self.fail("expected snapshot info of non-existent snapshot to fail")
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_in_group(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # snapshot subvolume in group
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_snapshot_ls(self):
+ # tests the 'fs subvolume snapshot ls' command
+
+ snapshots = []
+
+ # create subvolume
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # create subvolume snapshots
+ snapshots = self._generate_random_snapshot_name(3)
+ for snapshot in snapshots:
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume))
+ if len(subvolsnapshotls) == 0:
+ self.fail("Expected the 'fs subvolume snapshot ls' command to list the created subvolume snapshots")
+ else:
+ snapshotnames = [snapshot['name'] for snapshot in subvolsnapshotls]
+ if collections.Counter(snapshotnames) != collections.Counter(snapshots):
+ self.fail("Error creating or listing subvolume snapshots")
+
+ # remove snapshot
+ for snapshot in snapshots:
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_inherited_snapshot_ls(self):
+ # tests the scenario where 'fs subvolume snapshot ls' command
+ # should not list inherited snapshots created as part of snapshot
+ # at ancestral level
+
+ snapshots = []
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snap_count = 3
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # create subvolume snapshots
+ snapshots = self._generate_random_snapshot_name(snap_count)
+ for snapshot in snapshots:
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group)
+
+ # Create snapshot at ancestral level
+ ancestral_snappath1 = os.path.join(".", "volumes", group, ".snap", "ancestral_snap_1")
+ ancestral_snappath2 = os.path.join(".", "volumes", group, ".snap", "ancestral_snap_2")
+ self.mount_a.run_shell(['sudo', 'mkdir', '-p', ancestral_snappath1, ancestral_snappath2], omit_sudo=False)
+
+ subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume, group))
+ self.assertEqual(len(subvolsnapshotls), snap_count)
+
+ # remove ancestral snapshots
+ self.mount_a.run_shell(['sudo', 'rmdir', ancestral_snappath1, ancestral_snappath2], omit_sudo=False)
+
+ # remove snapshot
+ for snapshot in snapshots:
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_inherited_snapshot_info(self):
+ """
+ tests the scenario where 'fs subvolume snapshot info' command
+ should fail for inherited snapshots created as part of snapshot
+ at ancestral level
+ """
+
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # Create snapshot at ancestral level
+ ancestral_snap_name = "ancestral_snap_1"
+ ancestral_snappath1 = os.path.join(".", "volumes", group, ".snap", ancestral_snap_name)
+ self.mount_a.run_shell(['sudo', 'mkdir', '-p', ancestral_snappath1], omit_sudo=False)
+
+ # Validate existence of inherited snapshot
+ group_path = os.path.join(".", "volumes", group)
+ inode_number_group_dir = int(self.mount_a.run_shell(['stat', '-c' '%i', group_path]).stdout.getvalue().strip())
+ inherited_snap = "_{0}_{1}".format(ancestral_snap_name, inode_number_group_dir)
+ inherited_snappath = os.path.join(".", "volumes", group, subvolume,".snap", inherited_snap)
+ self.mount_a.run_shell(['ls', inherited_snappath])
+
+ # snapshot info on inherited snapshot
+ try:
+ self._get_subvolume_snapshot_info(self.volname, subvolume, inherited_snap, group)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on snapshot info of inherited snapshot")
+ else:
+ self.fail("expected snapshot info of inherited snapshot to fail")
+
+ # remove ancestral snapshots
+ self.mount_a.run_shell(['sudo', 'rmdir', ancestral_snappath1], omit_sudo=False)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_inherited_snapshot_rm(self):
+ """
+ tests the scenario where 'fs subvolume snapshot rm' command
+ should fail for inherited snapshots created as part of snapshot
+ at ancestral level
+ """
+
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # Create snapshot at ancestral level
+ ancestral_snap_name = "ancestral_snap_1"
+ ancestral_snappath1 = os.path.join(".", "volumes", group, ".snap", ancestral_snap_name)
+ self.mount_a.run_shell(['sudo', 'mkdir', '-p', ancestral_snappath1], omit_sudo=False)
+
+ # Validate existence of inherited snap
+ group_path = os.path.join(".", "volumes", group)
+ inode_number_group_dir = int(self.mount_a.run_shell(['stat', '-c' '%i', group_path]).stdout.getvalue().strip())
+ inherited_snap = "_{0}_{1}".format(ancestral_snap_name, inode_number_group_dir)
+ inherited_snappath = os.path.join(".", "volumes", group, subvolume,".snap", inherited_snap)
+ self.mount_a.run_shell(['ls', inherited_snappath])
+
+ # inherited snapshot should not be deletable
+ try:
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, inherited_snap, "--group_name", group)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, msg="invalid error code when removing inherited snapshot")
+ else:
+ self.fail("expected removing inheirted snapshot to fail")
+
+ # remove ancestral snapshots
+ self.mount_a.run_shell(['sudo', 'rmdir', ancestral_snappath1], omit_sudo=False)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_subvolumegroup_snapshot_name_conflict(self):
+ """
+ tests the scenario where creation of subvolume snapshot name
+ with same name as it's subvolumegroup snapshot name. This should
+ fail.
+ """
+
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ group_snapshot = self._generate_random_snapshot_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+
+ # Create subvolumegroup snapshot
+ group_snapshot_path = os.path.join(".", "volumes", group, ".snap", group_snapshot)
+ self.mount_a.run_shell(['sudo', 'mkdir', '-p', group_snapshot_path], omit_sudo=False)
+
+ # Validate existence of subvolumegroup snapshot
+ self.mount_a.run_shell(['ls', group_snapshot_path])
+
+ # Creation of subvolume snapshot with it's subvolumegroup snapshot name should fail
+ try:
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, group_snapshot, "--group_name", group)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, msg="invalid error code when creating subvolume snapshot with same name as subvolume group snapshot")
+ else:
+ self.fail("expected subvolume snapshot creation with same name as subvolumegroup snapshot to fail")
+
+ # remove subvolumegroup snapshot
+ self.mount_a.run_shell(['sudo', 'rmdir', group_snapshot_path], omit_sudo=False)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_retain_snapshot_invalid_recreate(self):
+ """
+ ensure retained subvolume recreate does not leave any incarnations in the subvolume and trash
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # recreate subvolume with an invalid pool
+ data_pool = "invalid_pool"
+ try:
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on recreate of subvolume with invalid poolname")
+ else:
+ self.fail("expected recreate of subvolume with invalid poolname to fail")
+
+ # fetch info
+ subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume))
+ self.assertEqual(subvol_info["state"], "snapshot-retained",
+ msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"]))
+
+ # getpath
+ try:
+ self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of subvolume with retained snapshots")
+ else:
+ self.fail("expected getpath of subvolume with retained snapshots to fail")
+
+ # remove snapshot (should remove volume)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_recreate_subvolume(self):
+ """
+ ensure a retained subvolume can be recreated and further snapshotted
+ """
+ snap_md = ["created_at", "data_pool", "has_pending_clones"]
+
+ subvolume = self._generate_random_subvolume_name()
+ snapshot1, snapshot2 = self._generate_random_snapshot_name(2)
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1)
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # fetch info
+ subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume))
+ self.assertEqual(subvol_info["state"], "snapshot-retained",
+ msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"]))
+
+ # recreate retained subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # fetch info
+ subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume))
+ self.assertEqual(subvol_info["state"], "complete",
+ msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"]))
+
+ # snapshot info (older snapshot)
+ snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot1))
+ for md in snap_md:
+ self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md))
+ self.assertEqual(snap_info["has_pending_clones"], "no")
+
+ # snap-create (new snapshot)
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot2)
+
+ # remove with retain snapshots
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # list snapshots
+ subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume))
+ self.assertEqual(len(subvolsnapshotls), 2, "Expected the 'fs subvolume snapshot ls' command to list the"
+ " created subvolume snapshots")
+ snapshotnames = [snapshot['name'] for snapshot in subvolsnapshotls]
+ for snap in [snapshot1, snapshot2]:
+ self.assertIn(snap, snapshotnames, "Missing snapshot '{0}' in snapshot list".format(snap))
+
+ # remove snapshots (should remove volume)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot2)
+
+ # verify list subvolumes returns an empty list
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumels), 0)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_with_snapshots(self):
+ """
+ ensure retain snapshots based delete of a subvolume with snapshots retains the subvolume
+ also test allowed and dis-allowed operations on a retained subvolume
+ """
+ snap_md = ["created_at", "data_pool", "has_pending_clones"]
+
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove subvolume -- should fail with ENOTEMPTY since it has snapshots
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of retained subvolume with snapshots")
+ else:
+ self.fail("expected rm of subvolume with retained snapshots to fail")
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # fetch info
+ subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume))
+ self.assertEqual(subvol_info["state"], "snapshot-retained",
+ msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"]))
+
+ ## test allowed ops in retained state
+ # ls
+ subvolumes = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumes), 1, "subvolume ls count mismatch, expected '1', found {0}".format(len(subvolumes)))
+ self.assertEqual(subvolumes[0]['name'], subvolume,
+ "subvolume name mismatch in ls output, expected '{0}', found '{1}'".format(subvolume, subvolumes[0]['name']))
+
+ # snapshot info
+ snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot))
+ for md in snap_md:
+ self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md))
+ self.assertEqual(snap_info["has_pending_clones"], "no")
+
+ # rm --force (allowed but should fail)
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--force")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of subvolume with retained snapshots")
+ else:
+ self.fail("expected rm of subvolume with retained snapshots to fail")
+
+ # rm (allowed but should fail)
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of subvolume with retained snapshots")
+ else:
+ self.fail("expected rm of subvolume with retained snapshots to fail")
+
+ ## test disallowed ops
+ # getpath
+ try:
+ self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of subvolume with retained snapshots")
+ else:
+ self.fail("expected getpath of subvolume with retained snapshots to fail")
+
+ # resize
+ nsize = self.DEFAULT_FILE_SIZE*1024*1024
+ try:
+ self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize))
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on resize of subvolume with retained snapshots")
+ else:
+ self.fail("expected resize of subvolume with retained snapshots to fail")
+
+ # snap-create
+ try:
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, "fail")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on snapshot create of subvolume with retained snapshots")
+ else:
+ self.fail("expected snapshot create of subvolume with retained snapshots to fail")
+
+ # remove snapshot (should remove volume)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # verify list subvolumes returns an empty list
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumels), 0)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_without_snapshots(self):
+ """
+ ensure retain snapshots based delete of a subvolume with no snapshots, deletes the subbvolume
+ """
+ subvolume = self._generate_random_subvolume_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # remove with snapshot retention (should remove volume, no snapshots to retain)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # verify list subvolumes returns an empty list
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumels), 0)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_trash_busy_recreate(self):
+ """
+ ensure retained subvolume recreate fails if its trash is not yet purged
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # fake a trash entry
+ self._update_fake_trash(subvolume)
+
+ # recreate subvolume
+ try:
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on recreate of subvolume with purge pending")
+ else:
+ self.fail("expected recreate of subvolume with purge pending to fail")
+
+ # clear fake trash entry
+ self._update_fake_trash(subvolume, create=False)
+
+ # recreate subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_rm_with_snapshots(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove subvolume -- should fail with ENOTEMPTY since it has snapshots
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOTEMPTY:
+ raise RuntimeError("invalid error code returned when deleting subvolume with snapshots")
+ else:
+ raise RuntimeError("expected subvolume deletion to fail")
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_protect_unprotect_sanity(self):
+ """
+ Snapshot protect/unprotect commands are deprecated. This test exists to ensure that
+ invoking the command does not cause errors, till they are removed from a subsequent release.
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=64)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # now, protect snapshot
+ self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # now, unprotect snapshot
+ self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_rm_force(self):
+ # test removing non existing subvolume snapshot with --force
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # remove snapshot
+ try:
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, "--force")
+ except CommandFailedError:
+ raise RuntimeError("expected the 'fs subvolume snapshot rm --force' command to succeed")
+
+ def test_subvolume_snapshot_metadata_set(self):
+ """
+ Set custom metadata for subvolume snapshot.
+ """
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+ # set metadata for snapshot.
+ key = "key"
+ value = "value"
+ try:
+ self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume snapshot metadata set' command to succeed")
+
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_metadata_set_idempotence(self):
+ """
+ Set custom metadata for subvolume snapshot (Idempotency).
+ """
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+ # set metadata for snapshot.
+ key = "key"
+ value = "value"
+ try:
+ self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume snapshot metadata set' command to succeed")
+
+ # set same metadata again for subvolume.
+ try:
+ self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume snapshot metadata set' command to succeed because it is idempotent operation")
+
+ # get value for specified key.
+ try:
+ ret = self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume snapshot metadata get' command to succeed")
+
+ # remove '\n' from returned value.
+ ret = ret.strip('\n')
+
+ # match received value with expected value.
+ self.assertEqual(value, ret)
+
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_metadata_get(self):
+ """
+ Get custom metadata for a specified key in subvolume snapshot metadata.
+ """
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+ # set metadata for snapshot.
+ key = "key"
+ value = "value"
+ self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+
+ # get value for specified key.
+ try:
+ ret = self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume snapshot metadata get' command to succeed")
+
+ # remove '\n' from returned value.
+ ret = ret.strip('\n')
+
+ # match received value with expected value.
+ self.assertEqual(value, ret)
+
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_metadata_get_for_nonexisting_key(self):
+ """
+ Get custom metadata for subvolume snapshot if specified key not exist in metadata.
+ """
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+ # set metadata for snapshot.
+ key = "key"
+ value = "value"
+ self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+
+ # try to get value for nonexisting key
+ # Expecting ENOENT exit status because key does not exist
+ try:
+ self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, "key_nonexist", group)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ self.fail("Expected ENOENT because 'key_nonexist' does not exist")
+
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_metadata_get_for_nonexisting_section(self):
+ """
+ Get custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot.
+ """
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+ # try to get value for nonexisting key (as section does not exist)
+ # Expecting ENOENT exit status because key does not exist
+ try:
+ self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, "key", group)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ self.fail("Expected ENOENT because section does not exist")
+
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_metadata_update(self):
+ """
+ Update custom metadata for a specified key in subvolume snapshot metadata.
+ """
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+ # set metadata for snapshot.
+ key = "key"
+ value = "value"
+ self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+
+ # update metadata against key.
+ new_value = "new_value"
+ self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, new_value, group)
+
+ # get metadata for specified key of snapshot.
+ try:
+ ret = self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume snapshot metadata get' command to succeed")
+
+ # remove '\n' from returned value.
+ ret = ret.strip('\n')
+
+ # match received value with expected value.
+ self.assertEqual(new_value, ret)
+
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_metadata_list(self):
+ """
+ List custom metadata for subvolume snapshot.
+ """
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+ # set metadata for subvolume.
+ input_metadata_dict = {f'key_{i}' : f'value_{i}' for i in range(3)}
+
+ for k, v in input_metadata_dict.items():
+ self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, k, v, group)
+
+ # list metadata
+ try:
+ ret_dict = json.loads(self._fs_cmd("subvolume", "snapshot", "metadata", "ls", self.volname, subvolname, snapshot, group))
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume snapshot metadata ls' command to succeed")
+
+ # compare output with expected output
+ self.assertDictEqual(input_metadata_dict, ret_dict)
+
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_metadata_list_if_no_metadata_set(self):
+ """
+ List custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot.
+ """
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+ # list metadata
+ try:
+ ret_dict = json.loads(self._fs_cmd("subvolume", "snapshot", "metadata", "ls", self.volname, subvolname, snapshot, group))
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume snapshot metadata ls' command to succeed")
+
+ # compare output with expected output
+ empty_dict = {}
+ self.assertDictEqual(ret_dict, empty_dict)
+
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_metadata_remove(self):
+ """
+ Remove custom metadata for a specified key in subvolume snapshot metadata.
+ """
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+ # set metadata for snapshot.
+ key = "key"
+ value = "value"
+ self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+
+ # remove metadata against specified key.
+ try:
+ self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, key, group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume snapshot metadata rm' command to succeed")
+
+ # confirm key is removed by again fetching metadata
+ try:
+ self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, key, snapshot, group)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ self.fail("Expected ENOENT because key does not exist")
+
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_metadata_remove_for_nonexisting_key(self):
+ """
+ Remove custom metadata for subvolume snapshot if specified key not exist in metadata.
+ """
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+ # set metadata for snapshot.
+ key = "key"
+ value = "value"
+ self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+
+ # try to remove value for nonexisting key
+ # Expecting ENOENT exit status because key does not exist
+ try:
+ self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, "key_nonexist", group)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ self.fail("Expected ENOENT because 'key_nonexist' does not exist")
+
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_metadata_remove_for_nonexisting_section(self):
+ """
+ Remove custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot.
+ """
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+ # try to remove value for nonexisting key (as section does not exist)
+ # Expecting ENOENT exit status because key does not exist
+ try:
+ self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, "key", group)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ self.fail("Expected ENOENT because section does not exist")
+
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_metadata_remove_force(self):
+ """
+ Forcefully remove custom metadata for a specified key in subvolume snapshot metadata.
+ """
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+ # set metadata for snapshot.
+ key = "key"
+ value = "value"
+ self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+
+ # remove metadata against specified key with --force option.
+ try:
+ self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, key, group, "--force")
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume snapshot metadata rm' command to succeed")
+
+ # confirm key is removed by again fetching metadata
+ try:
+ self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ self.fail("Expected ENOENT because key does not exist")
+
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_metadata_remove_force_for_nonexisting_key(self):
+ """
+ Forcefully remove custom metadata for subvolume snapshot if specified key not exist in metadata.
+ """
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+ # set metadata for snapshot.
+ key = "key"
+ value = "value"
+ self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+
+ # remove metadata against specified key.
+ try:
+ self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, key, group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume snapshot metadata rm' command to succeed")
+
+ # confirm key is removed by again fetching metadata
+ try:
+ self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ self.fail("Expected ENOENT because key does not exist")
+
+ # again remove metadata against already removed key with --force option.
+ try:
+ self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, key, group, "--force")
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume snapshot metadata rm' (with --force) command to succeed")
+
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_metadata_after_snapshot_remove(self):
+ """
+ Verify metadata removal of subvolume snapshot after snapshot removal.
+ """
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+ # set metadata for snapshot.
+ key = "key"
+ value = "value"
+ self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+
+ # get value for specified key.
+ ret = self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group)
+
+ # remove '\n' from returned value.
+ ret = ret.strip('\n')
+
+ # match received value with expected value.
+ self.assertEqual(value, ret)
+
+ # remove subvolume snapshot.
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+
+ # try to get metadata after removing snapshot.
+ # Expecting error ENOENT with error message of snapshot does not exist
+ cmd_ret = self.mgr_cluster.mon_manager.run_cluster_cmd(
+ args=["fs", "subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group],
+ check_status=False, stdout=StringIO(), stderr=StringIO())
+ self.assertEqual(cmd_ret.returncode, errno.ENOENT, "Expecting ENOENT error")
+ self.assertIn(f"snapshot '{snapshot}' does not exist", cmd_ret.stderr.getvalue(),
+ f"Expecting message: snapshot '{snapshot}' does not exist ")
+
+ # confirm metadata is removed by searching section name in .meta file
+ meta_path = os.path.join(".", "volumes", group, subvolname, ".meta")
+ section_name = "SNAP_METADATA_" + snapshot
+
+ try:
+ self.mount_a.run_shell(f"sudo grep {section_name} {meta_path}", omit_sudo=False)
+ except CommandFailedError as e:
+ self.assertNotEqual(e.exitstatus, 0)
+ else:
+ self.fail("Expected non-zero exist status because section should not exist")
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+
+ def test_clean_stale_subvolume_snapshot_metadata(self):
+ """
+ Validate cleaning of stale subvolume snapshot metadata.
+ """
+ subvolname = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create group.
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume in group.
+ self._fs_cmd("subvolume", "create", self.volname, subvolname, group)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group)
+
+ # set metadata for snapshot.
+ key = "key"
+ value = "value"
+ try:
+ self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group)
+ except CommandFailedError:
+ self.fail("expected the 'fs subvolume snapshot metadata set' command to succeed")
+
+ # save the subvolume config file.
+ meta_path = os.path.join(".", "volumes", group, subvolname, ".meta")
+ tmp_meta_path = os.path.join(".", "volumes", group, subvolname, ".meta.stale_snap_section")
+ self.mount_a.run_shell(['sudo', 'cp', '-p', meta_path, tmp_meta_path], omit_sudo=False)
+
+ # Delete snapshot, this would remove user snap metadata
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group)
+
+ # Copy back saved subvolume config file. This would have stale snapshot metadata
+ self.mount_a.run_shell(['sudo', 'cp', '-p', tmp_meta_path, meta_path], omit_sudo=False)
+
+ # Verify that it has stale snapshot metadata
+ section_name = "SNAP_METADATA_" + snapshot
+ try:
+ self.mount_a.run_shell(f"sudo grep {section_name} {meta_path}", omit_sudo=False)
+ except CommandFailedError:
+ self.fail("Expected grep cmd to succeed because stale snapshot metadata exist")
+
+ # Do any subvolume operation to clean the stale snapshot metadata
+ _ = json.loads(self._get_subvolume_info(self.volname, subvolname, group))
+
+ # Verify that the stale snapshot metadata is cleaned
+ try:
+ self.mount_a.run_shell(f"sudo grep {section_name} {meta_path}", omit_sudo=False)
+ except CommandFailedError as e:
+ self.assertNotEqual(e.exitstatus, 0)
+ else:
+ self.fail("Expected non-zero exist status because stale snapshot metadata should not exist")
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvolname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean.
+ self._wait_for_trash_empty()
+ # Clean tmp config file
+ self.mount_a.run_shell(['sudo', 'rm', '-f', tmp_meta_path], omit_sudo=False)
+
+
+class TestSubvolumeSnapshotClones(TestVolumesHelper):
+ """ Tests for FS subvolume snapshot clone operations."""
+ def test_clone_subvolume_info(self):
+ # tests the 'fs subvolume info' command for a clone
+ subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime",
+ "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace",
+ "type", "uid"]
+
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=1)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ subvol_info = json.loads(self._get_subvolume_info(self.volname, clone))
+ if len(subvol_info) == 0:
+ raise RuntimeError("Expected the 'fs subvolume info' command to list metadata of subvolume")
+ for md in subvol_md:
+ if md not in subvol_info.keys():
+ raise RuntimeError("%s not present in the metadata of subvolume" % md)
+ if subvol_info["type"] != "clone":
+ raise RuntimeError("type should be set to clone")
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_info_without_snapshot_clone(self):
+ """
+ Verify subvolume snapshot info output without cloning snapshot.
+ If no clone is performed then path /volumes/_index/clone/{track_id}
+ will not exist.
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume.
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # list snapshot info
+ result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot))
+
+ # verify snapshot info
+ self.assertEqual(result['has_pending_clones'], "no")
+ self.assertFalse('orphan_clones_count' in result)
+ self.assertFalse('pending_clones' in result)
+
+ # remove snapshot, subvolume, clone
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_info_if_no_clone_pending(self):
+ """
+ Verify subvolume snapshot info output if no clone is in pending state.
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone_list = [f'clone_{i}' for i in range(3)]
+
+ # create subvolume.
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clones
+ for clone in clone_list:
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clones status
+ for clone in clone_list:
+ self._wait_for_clone_to_complete(clone)
+
+ # list snapshot info
+ result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot))
+
+ # verify snapshot info
+ self.assertEqual(result['has_pending_clones'], "no")
+ self.assertFalse('orphan_clones_count' in result)
+ self.assertFalse('pending_clones' in result)
+
+ # remove snapshot, subvolume, clone
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ for clone in clone_list:
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_info_if_clone_pending_for_no_group(self):
+ """
+ Verify subvolume snapshot info output if clones are in pending state.
+ Clones are not specified for particular target_group. Hence target_group
+ should not be in the output as we don't show _nogroup (default group)
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone_list = [f'clone_{i}' for i in range(3)]
+
+ # create subvolume.
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # insert delay at the beginning of snapshot clone
+ self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5)
+
+ # schedule a clones
+ for clone in clone_list:
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # list snapshot info
+ result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot))
+
+ # verify snapshot info
+ expected_clone_list = []
+ for clone in clone_list:
+ expected_clone_list.append({"name": clone})
+ self.assertEqual(result['has_pending_clones'], "yes")
+ self.assertFalse('orphan_clones_count' in result)
+ self.assertListEqual(result['pending_clones'], expected_clone_list)
+ self.assertEqual(len(result['pending_clones']), 3)
+
+ # check clones status
+ for clone in clone_list:
+ self._wait_for_clone_to_complete(clone)
+
+ # remove snapshot, subvolume, clone
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ for clone in clone_list:
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_info_if_clone_pending_for_target_group(self):
+ """
+ Verify subvolume snapshot info output if clones are in pending state.
+ Clones are not specified for target_group.
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+ group = self._generate_random_group_name()
+ target_group = self._generate_random_group_name()
+
+ # create groups
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+ self._fs_cmd("subvolumegroup", "create", self.volname, target_group)
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, group, "--mode=777")
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group)
+
+ # insert delay at the beginning of snapshot clone
+ self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone,
+ "--group_name", group, "--target_group_name", target_group)
+
+ # list snapshot info
+ result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot, "--group_name", group))
+
+ # verify snapshot info
+ expected_clone_list = [{"name": clone, "target_group": target_group}]
+ self.assertEqual(result['has_pending_clones'], "yes")
+ self.assertFalse('orphan_clones_count' in result)
+ self.assertListEqual(result['pending_clones'], expected_clone_list)
+ self.assertEqual(len(result['pending_clones']), 1)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone, clone_group=target_group)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+ self._fs_cmd("subvolume", "rm", self.volname, clone, target_group)
+
+ # remove groups
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, target_group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_info_if_orphan_clone(self):
+ """
+ Verify subvolume snapshot info output if orphan clones exists.
+ Orphan clones should not list under pending clones.
+ orphan_clones_count should display correct count of orphan clones'
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone_list = [f'clone_{i}' for i in range(3)]
+
+ # create subvolume.
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # insert delay at the beginning of snapshot clone
+ self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 15)
+
+ # schedule a clones
+ for clone in clone_list:
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # remove track file for third clone to make it orphan
+ meta_path = os.path.join(".", "volumes", "_nogroup", subvolume, ".meta")
+ pending_clones_result = self.mount_a.run_shell(['sudo', 'grep', 'clone snaps', '-A3', meta_path], omit_sudo=False, stdout=StringIO(), stderr=StringIO())
+ third_clone_track_id = pending_clones_result.stdout.getvalue().splitlines()[3].split(" = ")[0]
+ third_clone_track_path = os.path.join(".", "volumes", "_index", "clone", third_clone_track_id)
+ self.mount_a.run_shell(f"sudo rm -f {third_clone_track_path}", omit_sudo=False)
+
+ # list snapshot info
+ result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot))
+
+ # verify snapshot info
+ expected_clone_list = []
+ for i in range(len(clone_list)-1):
+ expected_clone_list.append({"name": clone_list[i]})
+ self.assertEqual(result['has_pending_clones'], "yes")
+ self.assertEqual(result['orphan_clones_count'], 1)
+ self.assertListEqual(result['pending_clones'], expected_clone_list)
+ self.assertEqual(len(result['pending_clones']), 2)
+
+ # check clones status
+ for i in range(len(clone_list)-1):
+ self._wait_for_clone_to_complete(clone_list[i])
+
+ # list snapshot info after cloning completion
+ res = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot))
+
+ # verify snapshot info (has_pending_clones should be no)
+ self.assertEqual(res['has_pending_clones'], "no")
+
+ def test_non_clone_status(self):
+ subvolume = self._generate_random_subvolume_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ try:
+ self._fs_cmd("clone", "status", self.volname, subvolume)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.ENOTSUP:
+ raise RuntimeError("invalid error code when fetching status of a non cloned subvolume")
+ else:
+ raise RuntimeError("expected fetching of clone status of a subvolume to fail")
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_clone_inherit_snapshot_namespace_and_size(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*12
+
+ # create subvolume, in an isolated namespace with a specified size
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--namespace-isolated", "--size", str(osize), "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=8)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # create a pool different from current subvolume pool
+ subvol_path = self._get_subvolume_path(self.volname, subvolume)
+ default_pool = self.mount_a.getfattr(subvol_path, "ceph.dir.layout.pool")
+ new_pool = "new_pool"
+ self.assertNotEqual(default_pool, new_pool)
+ self.fs.add_data_pool(new_pool)
+
+ # update source subvolume pool
+ self._do_subvolume_pool_and_namespace_update(subvolume, pool=new_pool, pool_namespace="")
+
+ # schedule a clone, with NO --pool specification
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_clone_inherit_quota_attrs(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*12
+
+ # create subvolume with a specified size
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777", "--size", str(osize))
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=8)
+
+ # get subvolume path
+ subvolpath = self._get_subvolume_path(self.volname, subvolume)
+
+ # set quota on number of files
+ self.mount_a.setfattr(subvolpath, 'ceph.quota.max_files', "20", sudo=True)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # get subvolume path
+ clonepath = self._get_subvolume_path(self.volname, clone)
+
+ # verify quota max_files is inherited from source snapshot
+ subvol_quota = self.mount_a.getfattr(subvolpath, "ceph.quota.max_files")
+ clone_quota = self.mount_a.getfattr(clonepath, "ceph.quota.max_files")
+ self.assertEqual(subvol_quota, clone_quota)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_clone_in_progress_getpath(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=64)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # Insert delay at the beginning of snapshot clone
+ self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # clone should not be accessible right now
+ try:
+ self._get_subvolume_path(self.volname, clone)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EAGAIN:
+ raise RuntimeError("invalid error code when fetching path of an pending clone")
+ else:
+ raise RuntimeError("expected fetching path of an pending clone to fail")
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # clone should be accessible now
+ subvolpath = self._get_subvolume_path(self.volname, clone)
+ self.assertNotEqual(subvolpath, None)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_clone_in_progress_snapshot_rm(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=64)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # Insert delay at the beginning of snapshot clone
+ self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # snapshot should not be deletable now
+ try:
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EAGAIN, msg="invalid error code when removing source snapshot of a clone")
+ else:
+ self.fail("expected removing source snapshot of a clone to fail")
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # clone should be accessible now
+ subvolpath = self._get_subvolume_path(self.volname, clone)
+ self.assertNotEqual(subvolpath, None)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_clone_in_progress_source(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=64)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # Insert delay at the beginning of snapshot clone
+ self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # verify clone source
+ result = json.loads(self._fs_cmd("clone", "status", self.volname, clone))
+ source = result['status']['source']
+ self.assertEqual(source['volume'], self.volname)
+ self.assertEqual(source['subvolume'], subvolume)
+ self.assertEqual(source.get('group', None), None)
+ self.assertEqual(source['snapshot'], snapshot)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # clone should be accessible now
+ subvolpath = self._get_subvolume_path(self.volname, clone)
+ self.assertNotEqual(subvolpath, None)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_clone_retain_snapshot_with_snapshots(self):
+ """
+ retain snapshots of a cloned subvolume and check disallowed operations
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot1, snapshot2 = self._generate_random_snapshot_name(2)
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # store path for clone verification
+ subvol1_path = self._get_subvolume_path(self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=16)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1)
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # clone retained subvolume snapshot
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot1, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot1, clone, subvol_path=subvol1_path)
+
+ # create a snapshot on the clone
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone, snapshot2)
+
+ # retain a clone
+ self._fs_cmd("subvolume", "rm", self.volname, clone, "--retain-snapshots")
+
+ # list snapshots
+ clonesnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, clone))
+ self.assertEqual(len(clonesnapshotls), 1, "Expected the 'fs subvolume snapshot ls' command to list the"
+ " created subvolume snapshots")
+ snapshotnames = [snapshot['name'] for snapshot in clonesnapshotls]
+ for snap in [snapshot2]:
+ self.assertIn(snap, snapshotnames, "Missing snapshot '{0}' in snapshot list".format(snap))
+
+ ## check disallowed operations on retained clone
+ # clone-status
+ try:
+ self._fs_cmd("clone", "status", self.volname, clone)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on clone status of clone with retained snapshots")
+ else:
+ self.fail("expected clone status of clone with retained snapshots to fail")
+
+ # clone-cancel
+ try:
+ self._fs_cmd("clone", "cancel", self.volname, clone)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on clone cancel of clone with retained snapshots")
+ else:
+ self.fail("expected clone cancel of clone with retained snapshots to fail")
+
+ # remove snapshots (removes subvolumes as all are in retained state)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone, snapshot2)
+
+ # verify list subvolumes returns an empty list
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumels), 0)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_clone(self):
+ """
+ clone a snapshot from a snapshot retained subvolume
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # store path for clone verification
+ subvol_path = self._get_subvolume_path(self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=16)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # clone retained subvolume snapshot
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone, subvol_path=subvol_path)
+
+ # remove snapshots (removes retained volume)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify list subvolumes returns an empty list
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumels), 0)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_clone_from_newer_snapshot(self):
+ """
+ clone a subvolume from recreated subvolume's latest snapshot
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot1, snapshot2 = self._generate_random_snapshot_name(2)
+ clone = self._generate_random_clone_name(1)
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=16)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1)
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # recreate subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # get and store path for clone verification
+ subvol2_path = self._get_subvolume_path(self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=16)
+
+ # snapshot newer subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot2)
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # clone retained subvolume's newer snapshot
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot2, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot2, clone, subvol_path=subvol2_path)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot2)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify list subvolumes returns an empty list
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumels), 0)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_recreate(self):
+ """
+ recreate a subvolume from one of its retained snapshots
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # store path for clone verification
+ subvol_path = self._get_subvolume_path(self.volname, subvolume)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=16)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # remove with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots")
+
+ # recreate retained subvolume using its own snapshot to clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, subvolume)
+
+ # check clone status
+ self._wait_for_clone_to_complete(subvolume)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, subvolume, subvol_path=subvol_path)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify list subvolumes returns an empty list
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumels), 0)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_retain_snapshot_trash_busy_recreate_clone(self):
+ """
+ ensure retained clone recreate fails if its trash is not yet purged
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # clone subvolume snapshot
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # snapshot clone
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone, snapshot)
+
+ # remove clone with snapshot retention
+ self._fs_cmd("subvolume", "rm", self.volname, clone, "--retain-snapshots")
+
+ # fake a trash entry
+ self._update_fake_trash(clone)
+
+ # clone subvolume snapshot (recreate)
+ try:
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on recreate of clone with purge pending")
+ else:
+ self.fail("expected recreate of clone with purge pending to fail")
+
+ # clear fake trash entry
+ self._update_fake_trash(clone, create=False)
+
+ # recreate subvolume
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone, snapshot)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_attr_clone(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io_mixed(subvolume)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_clone_failure_status_pending_in_progress_complete(self):
+ """
+ ensure failure status is not shown when clone is not in failed/cancelled state
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone1 = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=200)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # Insert delay at the beginning of snapshot clone
+ self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5)
+
+ # schedule a clone1
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1)
+
+ # pending clone shouldn't show failure status
+ clone1_result = self._get_clone_status(clone1)
+ try:
+ clone1_result["status"]["failure"]["errno"]
+ except KeyError as e:
+ self.assertEqual(str(e), "'failure'")
+ else:
+ self.fail("clone status shouldn't show failure for pending clone")
+
+ # check clone1 to be in-progress
+ self._wait_for_clone_to_be_in_progress(clone1)
+
+ # in-progress clone1 shouldn't show failure status
+ clone1_result = self._get_clone_status(clone1)
+ try:
+ clone1_result["status"]["failure"]["errno"]
+ except KeyError as e:
+ self.assertEqual(str(e), "'failure'")
+ else:
+ self.fail("clone status shouldn't show failure for in-progress clone")
+
+ # wait for clone1 to complete
+ self._wait_for_clone_to_complete(clone1)
+
+ # complete clone1 shouldn't show failure status
+ clone1_result = self._get_clone_status(clone1)
+ try:
+ clone1_result["status"]["failure"]["errno"]
+ except KeyError as e:
+ self.assertEqual(str(e), "'failure'")
+ else:
+ self.fail("clone status shouldn't show failure for complete clone")
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone1)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_clone_failure_status_failed(self):
+ """
+ ensure failure status is shown when clone is in failed state and validate the reason
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone1 = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=200)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # Insert delay at the beginning of snapshot clone
+ self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5)
+
+ # schedule a clone1
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1)
+
+ # remove snapshot from backend to force the clone failure.
+ snappath = os.path.join(".", "volumes", "_nogroup", subvolume, ".snap", snapshot)
+ self.mount_a.run_shell(['sudo', 'rmdir', snappath], omit_sudo=False)
+
+ # wait for clone1 to fail.
+ self._wait_for_clone_to_fail(clone1)
+
+ # check clone1 status
+ clone1_result = self._get_clone_status(clone1)
+ self.assertEqual(clone1_result["status"]["state"], "failed")
+ self.assertEqual(clone1_result["status"]["failure"]["errno"], "2")
+ self.assertEqual(clone1_result["status"]["failure"]["error_msg"], "snapshot '{0}' does not exist".format(snapshot))
+
+ # clone removal should succeed after failure, remove clone1
+ self._fs_cmd("subvolume", "rm", self.volname, clone1, "--force")
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_clone_failure_status_pending_cancelled(self):
+ """
+ ensure failure status is shown when clone is cancelled during pending state and validate the reason
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone1 = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=200)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # Insert delay at the beginning of snapshot clone
+ self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5)
+
+ # schedule a clone1
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1)
+
+ # cancel pending clone1
+ self._fs_cmd("clone", "cancel", self.volname, clone1)
+
+ # check clone1 status
+ clone1_result = self._get_clone_status(clone1)
+ self.assertEqual(clone1_result["status"]["state"], "canceled")
+ self.assertEqual(clone1_result["status"]["failure"]["errno"], "4")
+ self.assertEqual(clone1_result["status"]["failure"]["error_msg"], "user interrupted clone operation")
+
+ # clone removal should succeed with force after cancelled, remove clone1
+ self._fs_cmd("subvolume", "rm", self.volname, clone1, "--force")
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_clone_failure_status_in_progress_cancelled(self):
+ """
+ ensure failure status is shown when clone is cancelled during in-progress state and validate the reason
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone1 = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=200)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # Insert delay at the beginning of snapshot clone
+ self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5)
+
+ # schedule a clone1
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1)
+
+ # wait for clone1 to be in-progress
+ self._wait_for_clone_to_be_in_progress(clone1)
+
+ # cancel in-progess clone1
+ self._fs_cmd("clone", "cancel", self.volname, clone1)
+
+ # check clone1 status
+ clone1_result = self._get_clone_status(clone1)
+ self.assertEqual(clone1_result["status"]["state"], "canceled")
+ self.assertEqual(clone1_result["status"]["failure"]["errno"], "4")
+ self.assertEqual(clone1_result["status"]["failure"]["error_msg"], "user interrupted clone operation")
+
+ # clone removal should succeed with force after cancelled, remove clone1
+ self._fs_cmd("subvolume", "rm", self.volname, clone1, "--force")
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=64)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_quota_exceeded(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume with 20MB quota
+ osize = self.DEFAULT_FILE_SIZE*1024*1024*20
+ self._fs_cmd("subvolume", "create", self.volname, subvolume,"--mode=777", "--size", str(osize))
+
+ # do IO, write 50 files of 1MB each to exceed quota. This mostly succeeds as quota enforcement takes time.
+ try:
+ self._do_subvolume_io(subvolume, number_of_files=50)
+ except CommandFailedError:
+ # ignore quota enforcement error.
+ pass
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_in_complete_clone_rm(self):
+ """
+ Validates the removal of clone when it is not in 'complete|cancelled|failed' state.
+ The forceful removl of subvolume clone succeeds only if it's in any of the
+ 'complete|cancelled|failed' states. It fails with EAGAIN in any other states.
+ """
+
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=64)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # Insert delay at the beginning of snapshot clone
+ self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # Use --force since clone is not complete. Returns EAGAIN as clone is not either complete or cancelled.
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, clone, "--force")
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EAGAIN:
+ raise RuntimeError("invalid error code when trying to remove failed clone")
+ else:
+ raise RuntimeError("expected error when removing a failed clone")
+
+ # cancel on-going clone
+ self._fs_cmd("clone", "cancel", self.volname, clone)
+
+ # verify canceled state
+ self._check_clone_canceled(clone)
+
+ # clone removal should succeed after cancel
+ self._fs_cmd("subvolume", "rm", self.volname, clone, "--force")
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_retain_suid_guid(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # Create a file with suid, guid bits set along with executable bit.
+ args = ["subvolume", "getpath", self.volname, subvolume]
+ args = tuple(args)
+ subvolpath = self._fs_cmd(*args)
+ self.assertNotEqual(subvolpath, None)
+ subvolpath = subvolpath[1:].rstrip() # remove "/" prefix and any trailing newline
+
+ file_path = subvolpath
+ file_path = os.path.join(subvolpath, "test_suid_file")
+ self.mount_a.run_shell(["touch", file_path])
+ self.mount_a.run_shell(["chmod", "u+sx,g+sx", file_path])
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_and_reclone(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone1, clone2 = self._generate_random_clone_name(2)
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=32)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone1)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone1)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # now the clone is just like a normal subvolume -- snapshot the clone and fork
+ # another clone. before that do some IO so it's can be differentiated.
+ self._do_subvolume_io(clone1, create_dir="data", number_of_files=32)
+
+ # snapshot clone -- use same snap name
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone1, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, clone1, snapshot, clone2)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone2)
+
+ # verify clone
+ self._verify_clone(clone1, snapshot, clone2)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone1, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone1)
+ self._fs_cmd("subvolume", "rm", self.volname, clone2)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_cancel_in_progress(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=128)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # Insert delay at the beginning of snapshot clone
+ self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # cancel on-going clone
+ self._fs_cmd("clone", "cancel", self.volname, clone)
+
+ # verify canceled state
+ self._check_clone_canceled(clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone, "--force")
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_cancel_pending(self):
+ """
+ this test is a bit more involved compared to canceling an in-progress clone.
+ we'd need to ensure that a to-be canceled clone has still not been picked up
+ by cloner threads. exploit the fact that clones are picked up in an FCFS
+ fashion and there are four (4) cloner threads by default. When the number of
+ cloner threads increase, this test _may_ start tripping -- so, the number of
+ clone operations would need to be jacked up.
+ """
+ # default number of clone threads
+ NR_THREADS = 4
+ # good enough for 4 threads
+ NR_CLONES = 5
+ # yeh, 1gig -- we need the clone to run for sometime
+ FILE_SIZE_MB = 1024
+
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clones = self._generate_random_clone_name(NR_CLONES)
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=4, file_size=FILE_SIZE_MB)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule clones
+ for clone in clones:
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ to_wait = clones[0:NR_THREADS]
+ to_cancel = clones[NR_THREADS:]
+
+ # cancel pending clones and verify
+ for clone in to_cancel:
+ status = json.loads(self._fs_cmd("clone", "status", self.volname, clone))
+ self.assertEqual(status["status"]["state"], "pending")
+ self._fs_cmd("clone", "cancel", self.volname, clone)
+ self._check_clone_canceled(clone)
+
+ # let's cancel on-going clones. handle the case where some of the clones
+ # _just_ complete
+ for clone in list(to_wait):
+ try:
+ self._fs_cmd("clone", "cancel", self.volname, clone)
+ to_cancel.append(clone)
+ to_wait.remove(clone)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EINVAL:
+ raise RuntimeError("invalid error code when cancelling on-going clone")
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ for clone in to_wait:
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+ for clone in to_cancel:
+ self._fs_cmd("subvolume", "rm", self.volname, clone, "--force")
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_different_groups(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+ s_group, c_group = self._generate_random_group_name(2)
+
+ # create groups
+ self._fs_cmd("subvolumegroup", "create", self.volname, s_group)
+ self._fs_cmd("subvolumegroup", "create", self.volname, c_group)
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, s_group, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, subvolume_group=s_group, number_of_files=32)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, s_group)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone,
+ '--group_name', s_group, '--target_group_name', c_group)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone, clone_group=c_group)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone, source_group=s_group, clone_group=c_group)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, s_group)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, s_group)
+ self._fs_cmd("subvolume", "rm", self.volname, clone, c_group)
+
+ # remove groups
+ self._fs_cmd("subvolumegroup", "rm", self.volname, s_group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, c_group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_fail_with_remove(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone1, clone2 = self._generate_random_clone_name(2)
+
+ pool_capacity = 32 * 1024 * 1024
+ # number of files required to fill up 99% of the pool
+ nr_files = int((pool_capacity * 0.99) / (TestVolumes.DEFAULT_FILE_SIZE * 1024 * 1024))
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=nr_files)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # add data pool
+ new_pool = "new_pool"
+ self.fs.add_data_pool(new_pool)
+
+ self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", new_pool,
+ "max_bytes", "{0}".format(pool_capacity // 4))
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1, "--pool_layout", new_pool)
+
+ # check clone status -- this should dramatically overshoot the pool quota
+ self._wait_for_clone_to_complete(clone1)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone1, clone_pool=new_pool)
+
+ # wait a bit so that subsequent I/O will give pool full error
+ time.sleep(120)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone2, "--pool_layout", new_pool)
+
+ # check clone status
+ self._wait_for_clone_to_fail(clone2)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone1)
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, clone2)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EAGAIN:
+ raise RuntimeError("invalid error code when trying to remove failed clone")
+ else:
+ raise RuntimeError("expected error when removing a failed clone")
+
+ # ... and with force, failed clone can be removed
+ self._fs_cmd("subvolume", "rm", self.volname, clone2, "--force")
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_on_existing_subvolumes(self):
+ subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create subvolumes
+ self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--mode=777")
+ self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume1, number_of_files=32)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume1, snapshot)
+
+ # schedule a clone with target as subvolume2
+ try:
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume1, snapshot, subvolume2)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EEXIST:
+ raise RuntimeError("invalid error code when cloning to existing subvolume")
+ else:
+ raise RuntimeError("expected cloning to fail if the target is an existing subvolume")
+
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume1, snapshot, clone)
+
+ # schedule a clone with target as clone
+ try:
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume1, snapshot, clone)
+ except CommandFailedError as ce:
+ if ce.exitstatus != errno.EEXIST:
+ raise RuntimeError("invalid error code when cloning to existing clone")
+ else:
+ raise RuntimeError("expected cloning to fail if the target is an existing clone")
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume1, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume1, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume1)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume2)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_pool_layout(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # add data pool
+ new_pool = "new_pool"
+ newid = self.fs.add_data_pool(new_pool)
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=32)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, "--pool_layout", new_pool)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone, clone_pool=new_pool)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ subvol_path = self._get_subvolume_path(self.volname, clone)
+ desired_pool = self.mount_a.getfattr(subvol_path, "ceph.dir.layout.pool")
+ try:
+ self.assertEqual(desired_pool, new_pool)
+ except AssertionError:
+ self.assertEqual(int(desired_pool), newid) # old kernel returns id
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_under_group(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+ group = self._generate_random_group_name()
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=32)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, '--target_group_name', group)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone, clone_group=group)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone, clone_group=group)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone, group)
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_with_attrs(self):
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ mode = "777"
+ uid = "1000"
+ gid = "1000"
+ new_uid = "1001"
+ new_gid = "1001"
+ new_mode = "700"
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", mode, "--uid", uid, "--gid", gid)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=32)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # change subvolume attrs (to ensure clone picks up snapshot attrs)
+ self._do_subvolume_attr_update(subvolume, new_uid, new_gid, new_mode)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_clone_with_upgrade(self):
+ """
+ yet another poor man's upgrade test -- rather than going through a full
+ upgrade cycle, emulate old types subvolumes by going through the wormhole
+ and verify clone operation.
+ further ensure that a legacy volume is not updated to v2, but clone is.
+ """
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # emulate a old-fashioned subvolume
+ createpath = os.path.join(".", "volumes", "_nogroup", subvolume)
+ self.mount_a.run_shell_payload(f"sudo mkdir -p -m 777 {createpath}", omit_sudo=False)
+
+ # add required xattrs to subvolume
+ default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool")
+ self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=64)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # ensure metadata file is in legacy location, with required version v1
+ self._assert_meta_location_and_version(self.volname, subvolume, version=1, legacy=True)
+
+ # Insert delay at the beginning of snapshot clone
+ self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+ # snapshot should not be deletable now
+ try:
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EAGAIN, msg="invalid error code when removing source snapshot of a clone")
+ else:
+ self.fail("expected removing source snapshot of a clone to fail")
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone, source_version=1)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+ # ensure metadata file is in v2 location, with required version v2
+ self._assert_meta_location_and_version(self.volname, clone)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_snapshot_reconf_max_concurrent_clones(self):
+ """
+ Validate 'max_concurrent_clones' config option
+ """
+
+ # get the default number of cloner threads
+ default_max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones'))
+ self.assertEqual(default_max_concurrent_clones, 4)
+
+ # Increase number of cloner threads
+ self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 6)
+ max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones'))
+ self.assertEqual(max_concurrent_clones, 6)
+
+ # Decrease number of cloner threads
+ self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 2)
+ max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones'))
+ self.assertEqual(max_concurrent_clones, 2)
+
+ def test_subvolume_snapshot_config_snapshot_clone_delay(self):
+ """
+ Validate 'snapshot_clone_delay' config option
+ """
+
+ # get the default delay before starting the clone
+ default_timeout = int(self.config_get('mgr', 'mgr/volumes/snapshot_clone_delay'))
+ self.assertEqual(default_timeout, 0)
+
+ # Insert delay of 2 seconds at the beginning of the snapshot clone
+ self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2)
+ default_timeout = int(self.config_get('mgr', 'mgr/volumes/snapshot_clone_delay'))
+ self.assertEqual(default_timeout, 2)
+
+ # Decrease number of cloner threads
+ self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 2)
+ max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones'))
+ self.assertEqual(max_concurrent_clones, 2)
+
+ def test_subvolume_under_group_snapshot_clone(self):
+ subvolume = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone = self._generate_random_clone_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, group, "--mode=777")
+
+ # do some IO
+ self._do_subvolume_io(subvolume, subvolume_group=group, number_of_files=32)
+
+ # snapshot subvolume
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group)
+
+ # schedule a clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, '--group_name', group)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone, source_group=group)
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group)
+
+ # remove subvolumes
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume, group)
+ self._fs_cmd("subvolume", "rm", self.volname, clone)
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+
+class TestMisc(TestVolumesHelper):
+ """Miscellaneous tests related to FS volume, subvolume group, and subvolume operations."""
+ def test_connection_expiration(self):
+ # unmount any cephfs mounts
+ for i in range(0, self.CLIENTS_REQUIRED):
+ self.mounts[i].umount_wait()
+ sessions = self._session_list()
+ self.assertLessEqual(len(sessions), 1) # maybe mgr is already mounted
+
+ # Get the mgr to definitely mount cephfs
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+ sessions = self._session_list()
+ self.assertEqual(len(sessions), 1)
+
+ # Now wait for the mgr to expire the connection:
+ self.wait_until_evicted(sessions[0]['id'], timeout=90)
+
+ def test_mgr_eviction(self):
+ # unmount any cephfs mounts
+ for i in range(0, self.CLIENTS_REQUIRED):
+ self.mounts[i].umount_wait()
+ sessions = self._session_list()
+ self.assertLessEqual(len(sessions), 1) # maybe mgr is already mounted
+
+ # Get the mgr to definitely mount cephfs
+ subvolume = self._generate_random_subvolume_name()
+ self._fs_cmd("subvolume", "create", self.volname, subvolume)
+ sessions = self._session_list()
+ self.assertEqual(len(sessions), 1)
+
+ # Now fail the mgr, check the session was evicted
+ mgr = self.mgr_cluster.get_active_id()
+ self.mgr_cluster.mgr_fail(mgr)
+ self.wait_until_evicted(sessions[0]['id'])
+
+ def test_names_can_only_be_goodchars(self):
+ """
+ Test the creating vols, subvols subvolgroups fails when their names uses
+ characters beyond [a-zA-Z0-9 -_.].
+ """
+ volname, badname = 'testvol', 'abcd@#'
+
+ with self.assertRaises(CommandFailedError):
+ self._fs_cmd('volume', 'create', badname)
+ self._fs_cmd('volume', 'create', volname)
+
+ with self.assertRaises(CommandFailedError):
+ self._fs_cmd('subvolumegroup', 'create', volname, badname)
+
+ with self.assertRaises(CommandFailedError):
+ self._fs_cmd('subvolume', 'create', volname, badname)
+ self._fs_cmd('volume', 'rm', volname, '--yes-i-really-mean-it')
+
+ def test_subvolume_ops_on_nonexistent_vol(self):
+ # tests the fs subvolume operations on non existing volume
+
+ volname = "non_existent_subvolume"
+
+ # try subvolume operations
+ for op in ("create", "rm", "getpath", "info", "resize", "pin", "ls"):
+ try:
+ if op == "resize":
+ self._fs_cmd("subvolume", "resize", volname, "subvolname_1", "inf")
+ elif op == "pin":
+ self._fs_cmd("subvolume", "pin", volname, "subvolname_1", "export", "1")
+ elif op == "ls":
+ self._fs_cmd("subvolume", "ls", volname)
+ else:
+ self._fs_cmd("subvolume", op, volname, "subvolume_1")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT)
+ else:
+ self.fail("expected the 'fs subvolume {0}' command to fail".format(op))
+
+ # try subvolume snapshot operations and clone create
+ for op in ("create", "rm", "info", "protect", "unprotect", "ls", "clone"):
+ try:
+ if op == "ls":
+ self._fs_cmd("subvolume", "snapshot", op, volname, "subvolume_1")
+ elif op == "clone":
+ self._fs_cmd("subvolume", "snapshot", op, volname, "subvolume_1", "snapshot_1", "clone_1")
+ else:
+ self._fs_cmd("subvolume", "snapshot", op, volname, "subvolume_1", "snapshot_1")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT)
+ else:
+ self.fail("expected the 'fs subvolume snapshot {0}' command to fail".format(op))
+
+ # try, clone status
+ try:
+ self._fs_cmd("clone", "status", volname, "clone_1")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT)
+ else:
+ self.fail("expected the 'fs clone status' command to fail")
+
+ # try subvolumegroup operations
+ for op in ("create", "rm", "getpath", "pin", "ls"):
+ try:
+ if op == "pin":
+ self._fs_cmd("subvolumegroup", "pin", volname, "group_1", "export", "0")
+ elif op == "ls":
+ self._fs_cmd("subvolumegroup", op, volname)
+ else:
+ self._fs_cmd("subvolumegroup", op, volname, "group_1")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT)
+ else:
+ self.fail("expected the 'fs subvolumegroup {0}' command to fail".format(op))
+
+ # try subvolumegroup snapshot operations
+ for op in ("create", "rm", "ls"):
+ try:
+ if op == "ls":
+ self._fs_cmd("subvolumegroup", "snapshot", op, volname, "group_1")
+ else:
+ self._fs_cmd("subvolumegroup", "snapshot", op, volname, "group_1", "snapshot_1")
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.ENOENT)
+ else:
+ self.fail("expected the 'fs subvolumegroup snapshot {0}' command to fail".format(op))
+
+ def test_subvolume_upgrade_legacy_to_v1(self):
+ """
+ poor man's upgrade test -- rather than going through a full upgrade cycle,
+ emulate subvolumes by going through the wormhole and verify if they are
+ accessible.
+ further ensure that a legacy volume is not updated to v2.
+ """
+ subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+ group = self._generate_random_group_name()
+
+ # emulate a old-fashioned subvolume -- one in the default group and
+ # the other in a custom group
+ createpath1 = os.path.join(".", "volumes", "_nogroup", subvolume1)
+ self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath1], omit_sudo=False)
+
+ # create group
+ createpath2 = os.path.join(".", "volumes", group, subvolume2)
+ self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath2], omit_sudo=False)
+
+ # this would auto-upgrade on access without anyone noticing
+ subvolpath1 = self._fs_cmd("subvolume", "getpath", self.volname, subvolume1)
+ self.assertNotEqual(subvolpath1, None)
+ subvolpath1 = subvolpath1.rstrip() # remove "/" prefix and any trailing newline
+
+ subvolpath2 = self._fs_cmd("subvolume", "getpath", self.volname, subvolume2, group)
+ self.assertNotEqual(subvolpath2, None)
+ subvolpath2 = subvolpath2.rstrip() # remove "/" prefix and any trailing newline
+
+ # and... the subvolume path returned should be what we created behind the scene
+ self.assertEqual(createpath1[1:], subvolpath1)
+ self.assertEqual(createpath2[1:], subvolpath2)
+
+ # ensure metadata file is in legacy location, with required version v1
+ self._assert_meta_location_and_version(self.volname, subvolume1, version=1, legacy=True)
+ self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=1, legacy=True)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume1)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_subvolume_no_upgrade_v1_sanity(self):
+ """
+ poor man's upgrade test -- theme continues...
+
+ This test is to ensure v1 subvolumes are retained as is, due to a snapshot being present, and runs through
+ a series of operations on the v1 subvolume to ensure they work as expected.
+ """
+ subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime",
+ "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace",
+ "type", "uid", "features", "state"]
+ snap_md = ["created_at", "data_pool", "has_pending_clones"]
+
+ subvolume = self._generate_random_subvolume_name()
+ snapshot = self._generate_random_snapshot_name()
+ clone1, clone2 = self._generate_random_clone_name(2)
+ mode = "777"
+ uid = "1000"
+ gid = "1000"
+
+ # emulate a v1 subvolume -- in the default group
+ subvolume_path = self._create_v1_subvolume(subvolume)
+
+ # getpath
+ subvolpath = self._get_subvolume_path(self.volname, subvolume)
+ self.assertEqual(subvolpath, subvolume_path)
+
+ # ls
+ subvolumes = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumes), 1, "subvolume ls count mismatch, expected '1', found {0}".format(len(subvolumes)))
+ self.assertEqual(subvolumes[0]['name'], subvolume,
+ "subvolume name mismatch in ls output, expected '{0}', found '{1}'".format(subvolume, subvolumes[0]['name']))
+
+ # info
+ subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+ for md in subvol_md:
+ self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md))
+
+ self.assertEqual(subvol_info["state"], "complete",
+ msg="expected state to be 'complete', found '{0}".format(subvol_info["state"]))
+ self.assertEqual(len(subvol_info["features"]), 2,
+ msg="expected 1 feature, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"]))
+ for feature in ['snapshot-clone', 'snapshot-autoprotect']:
+ self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature))
+
+ # resize
+ nsize = self.DEFAULT_FILE_SIZE*1024*1024*10
+ self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize))
+ subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
+ for md in subvol_md:
+ self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md))
+ self.assertEqual(subvol_info["bytes_quota"], nsize, "bytes_quota should be set to '{0}'".format(nsize))
+
+ # create (idempotent) (change some attrs, to ensure attrs are preserved from the snapshot on clone)
+ self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", mode, "--uid", uid, "--gid", gid)
+
+ # do some IO
+ self._do_subvolume_io(subvolume, number_of_files=8)
+
+ # snap-create
+ self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+ # clone
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone1)
+
+ # ensure clone is v2
+ self._assert_meta_location_and_version(self.volname, clone1, version=2)
+
+ # verify clone
+ self._verify_clone(subvolume, snapshot, clone1, source_version=1)
+
+ # clone (older snapshot)
+ self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, 'fake', clone2)
+
+ # check clone status
+ self._wait_for_clone_to_complete(clone2)
+
+ # ensure clone is v2
+ self._assert_meta_location_and_version(self.volname, clone2, version=2)
+
+ # verify clone
+ # TODO: rentries will mismatch till this is fixed https://tracker.ceph.com/issues/46747
+ #self._verify_clone(subvolume, 'fake', clone2, source_version=1)
+
+ # snap-info
+ snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot))
+ for md in snap_md:
+ self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md))
+ self.assertEqual(snap_info["has_pending_clones"], "no")
+
+ # snap-ls
+ subvol_snapshots = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume))
+ self.assertEqual(len(subvol_snapshots), 2, "subvolume ls count mismatch, expected 2', found {0}".format(len(subvol_snapshots)))
+ snapshotnames = [snapshot['name'] for snapshot in subvol_snapshots]
+ for name in [snapshot, 'fake']:
+ self.assertIn(name, snapshotnames, msg="expected snapshot '{0}' in subvolume snapshot ls".format(name))
+
+ # snap-rm
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, "fake")
+
+ # ensure volume is still at version 1
+ self._assert_meta_location_and_version(self.volname, subvolume, version=1)
+
+ # rm
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+ self._fs_cmd("subvolume", "rm", self.volname, clone1)
+ self._fs_cmd("subvolume", "rm", self.volname, clone2)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_no_upgrade_v1_to_v2(self):
+ """
+ poor man's upgrade test -- theme continues...
+ ensure v1 to v2 upgrades are not done automatically due to various states of v1
+ """
+ subvolume1, subvolume2, subvolume3 = self._generate_random_subvolume_name(3)
+ group = self._generate_random_group_name()
+
+ # emulate a v1 subvolume -- in the default group
+ subvol1_path = self._create_v1_subvolume(subvolume1)
+
+ # emulate a v1 subvolume -- in a custom group
+ subvol2_path = self._create_v1_subvolume(subvolume2, subvol_group=group)
+
+ # emulate a v1 subvolume -- in a clone pending state
+ self._create_v1_subvolume(subvolume3, subvol_type='clone', has_snapshot=False, state='pending')
+
+ # this would attempt auto-upgrade on access, but fail to do so as snapshots exist
+ subvolpath1 = self._get_subvolume_path(self.volname, subvolume1)
+ self.assertEqual(subvolpath1, subvol1_path)
+
+ subvolpath2 = self._get_subvolume_path(self.volname, subvolume2, group_name=group)
+ self.assertEqual(subvolpath2, subvol2_path)
+
+ # this would attempt auto-upgrade on access, but fail to do so as volume is not complete
+ # use clone status, as only certain operations are allowed in pending state
+ status = json.loads(self._fs_cmd("clone", "status", self.volname, subvolume3))
+ self.assertEqual(status["status"]["state"], "pending")
+
+ # remove snapshot
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume1, "fake")
+ self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume2, "fake", group)
+
+ # ensure metadata file is in v1 location, with version retained as v1
+ self._assert_meta_location_and_version(self.volname, subvolume1, version=1)
+ self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=1)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume1)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group)
+ try:
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume3)
+ except CommandFailedError as ce:
+ self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on rm of subvolume undergoing clone")
+ else:
+ self.fail("expected rm of subvolume undergoing clone to fail")
+
+ # ensure metadata file is in v1 location, with version retained as v1
+ self._assert_meta_location_and_version(self.volname, subvolume3, version=1)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume3, "--force")
+
+ # verify list subvolumes returns an empty list
+ subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname))
+ self.assertEqual(len(subvolumels), 0)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_subvolume_upgrade_v1_to_v2(self):
+ """
+ poor man's upgrade test -- theme continues...
+ ensure v1 to v2 upgrades work
+ """
+ subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
+ group = self._generate_random_group_name()
+
+ # emulate a v1 subvolume -- in the default group
+ subvol1_path = self._create_v1_subvolume(subvolume1, has_snapshot=False)
+
+ # emulate a v1 subvolume -- in a custom group
+ subvol2_path = self._create_v1_subvolume(subvolume2, subvol_group=group, has_snapshot=False)
+
+ # this would attempt auto-upgrade on access
+ subvolpath1 = self._get_subvolume_path(self.volname, subvolume1)
+ self.assertEqual(subvolpath1, subvol1_path)
+
+ subvolpath2 = self._get_subvolume_path(self.volname, subvolume2, group_name=group)
+ self.assertEqual(subvolpath2, subvol2_path)
+
+ # ensure metadata file is in v2 location, with version retained as v2
+ self._assert_meta_location_and_version(self.volname, subvolume1, version=2)
+ self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=2)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume1)
+ self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_malicious_metafile_on_legacy_to_v1_upgrade(self):
+ """
+ Validate handcrafted .meta file on legacy subvol root doesn't break the system
+ on legacy subvol upgrade to v1
+ poor man's upgrade test -- theme continues...
+ """
+ subvol1, subvol2 = self._generate_random_subvolume_name(2)
+
+ # emulate a old-fashioned subvolume in the default group
+ createpath1 = os.path.join(".", "volumes", "_nogroup", subvol1)
+ self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath1], omit_sudo=False)
+
+ # add required xattrs to subvolume
+ default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool")
+ self.mount_a.setfattr(createpath1, 'ceph.dir.layout.pool', default_pool, sudo=True)
+
+ # create v2 subvolume
+ self._fs_cmd("subvolume", "create", self.volname, subvol2)
+
+ # Create malicious .meta file in legacy subvolume root. Copy v2 subvolume
+ # .meta into legacy subvol1's root
+ subvol2_metapath = os.path.join(".", "volumes", "_nogroup", subvol2, ".meta")
+ self.mount_a.run_shell(['sudo', 'cp', subvol2_metapath, createpath1], omit_sudo=False)
+
+ # Upgrade legacy subvol1 to v1
+ subvolpath1 = self._fs_cmd("subvolume", "getpath", self.volname, subvol1)
+ self.assertNotEqual(subvolpath1, None)
+ subvolpath1 = subvolpath1.rstrip()
+
+ # the subvolume path returned should not be of subvol2 from handcrafted
+ # .meta file
+ self.assertEqual(createpath1[1:], subvolpath1)
+
+ # ensure metadata file is in legacy location, with required version v1
+ self._assert_meta_location_and_version(self.volname, subvol1, version=1, legacy=True)
+
+ # Authorize alice authID read-write access to subvol1. Verify it authorizes subvol1 path and not subvol2
+ # path whose '.meta' file is copied to subvol1 root
+ authid1 = "alice"
+ self._fs_cmd("subvolume", "authorize", self.volname, subvol1, authid1)
+
+ # Validate that the mds path added is of subvol1 and not of subvol2
+ out = json.loads(self.fs.mon_manager.raw_cluster_cmd("auth", "get", "client.alice", "--format=json-pretty"))
+ self.assertEqual("client.alice", out[0]["entity"])
+ self.assertEqual("allow rw path={0}".format(createpath1[1:]), out[0]["caps"]["mds"])
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvol1)
+ self._fs_cmd("subvolume", "rm", self.volname, subvol2)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ def test_binary_metafile_on_legacy_to_v1_upgrade(self):
+ """
+ Validate binary .meta file on legacy subvol root doesn't break the system
+ on legacy subvol upgrade to v1
+ poor man's upgrade test -- theme continues...
+ """
+ subvol = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # emulate a old-fashioned subvolume -- in a custom group
+ createpath = os.path.join(".", "volumes", group, subvol)
+ self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False)
+
+ # add required xattrs to subvolume
+ default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool")
+ self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True)
+
+ # Create unparseable binary .meta file on legacy subvol's root
+ meta_contents = os.urandom(4096)
+ meta_filepath = os.path.join(self.mount_a.mountpoint, createpath, ".meta")
+ self.mount_a.client_remote.write_file(meta_filepath, meta_contents, sudo=True)
+
+ # Upgrade legacy subvol to v1
+ subvolpath = self._fs_cmd("subvolume", "getpath", self.volname, subvol, group)
+ self.assertNotEqual(subvolpath, None)
+ subvolpath = subvolpath.rstrip()
+
+ # The legacy subvolume path should be returned for subvol.
+ # Should ignore unparseable binary .meta file in subvol's root
+ self.assertEqual(createpath[1:], subvolpath)
+
+ # ensure metadata file is in legacy location, with required version v1
+ self._assert_meta_location_and_version(self.volname, subvol, subvol_group=group, version=1, legacy=True)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvol, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ def test_unparseable_metafile_on_legacy_to_v1_upgrade(self):
+ """
+ Validate unparseable text .meta file on legacy subvol root doesn't break the system
+ on legacy subvol upgrade to v1
+ poor man's upgrade test -- theme continues...
+ """
+ subvol = self._generate_random_subvolume_name()
+ group = self._generate_random_group_name()
+
+ # emulate a old-fashioned subvolume -- in a custom group
+ createpath = os.path.join(".", "volumes", group, subvol)
+ self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False)
+
+ # add required xattrs to subvolume
+ default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool")
+ self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True)
+
+ # Create unparseable text .meta file on legacy subvol's root
+ meta_contents = "unparseable config\nfile ...\nunparseable config\nfile ...\n"
+ meta_filepath = os.path.join(self.mount_a.mountpoint, createpath, ".meta")
+ self.mount_a.client_remote.write_file(meta_filepath, meta_contents, sudo=True)
+
+ # Upgrade legacy subvol to v1
+ subvolpath = self._fs_cmd("subvolume", "getpath", self.volname, subvol, group)
+ self.assertNotEqual(subvolpath, None)
+ subvolpath = subvolpath.rstrip()
+
+ # The legacy subvolume path should be returned for subvol.
+ # Should ignore unparseable binary .meta file in subvol's root
+ self.assertEqual(createpath[1:], subvolpath)
+
+ # ensure metadata file is in legacy location, with required version v1
+ self._assert_meta_location_and_version(self.volname, subvol, subvol_group=group, version=1, legacy=True)
+
+ # remove subvolume
+ self._fs_cmd("subvolume", "rm", self.volname, subvol, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
+
+ # remove group
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+class TestPerModuleFinsherThread(TestVolumesHelper):
+ """
+ Per module finisher thread tests related to mgr/volume cmds.
+ This is used in conjuction with check_counter with min val being 4
+ as four subvolume cmds are run
+ """
+ def test_volumes_module_finisher_thread(self):
+ subvol1, subvol2, subvol3 = self._generate_random_subvolume_name(3)
+ group = self._generate_random_group_name()
+
+ # create group
+ self._fs_cmd("subvolumegroup", "create", self.volname, group)
+
+ # create subvolumes in group
+ self._fs_cmd("subvolume", "create", self.volname, subvol1, "--group_name", group)
+ self._fs_cmd("subvolume", "create", self.volname, subvol2, "--group_name", group)
+ self._fs_cmd("subvolume", "create", self.volname, subvol3, "--group_name", group)
+
+ self._fs_cmd("subvolume", "rm", self.volname, subvol1, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvol2, group)
+ self._fs_cmd("subvolume", "rm", self.volname, subvol3, group)
+ self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+ # verify trash dir is clean
+ self._wait_for_trash_empty()
diff --git a/qa/tasks/cephfs/xfstests_dev.py b/qa/tasks/cephfs/xfstests_dev.py
new file mode 100644
index 000000000..cbb344305
--- /dev/null
+++ b/qa/tasks/cephfs/xfstests_dev.py
@@ -0,0 +1,303 @@
+from io import StringIO
+from logging import getLogger
+from os import getcwd as os_getcwd
+from os.path import join
+from textwrap import dedent
+
+
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from tasks.cephfs.fuse_mount import FuseMount
+from tasks.cephfs.kernel_mount import KernelMount
+
+
+log = getLogger(__name__)
+
+
+# TODO: add code to run non-ACL tests too.
+# TODO: make xfstests-dev tests running without running `make install`.
+class XFSTestsDev(CephFSTestCase):
+
+ RESULTS_DIR = "results"
+
+ def setUp(self):
+ super(XFSTestsDev, self).setUp()
+ self.setup_xfsprogs_devs()
+ self.prepare_xfstests_devs()
+
+ def setup_xfsprogs_devs(self):
+ self.install_xfsprogs = False
+
+ def prepare_xfstests_devs(self):
+ # NOTE: To run a quick test with vstart_runner.py, enable next line
+ # and disable calls to get_repo(), install_deps(), and
+ # build_and_install() and also disable lines in tearDown() for repo
+ # deletion.
+ #self.xfstests_repo_path = '/path/to/xfstests-dev'
+
+ self.get_repos()
+ self.get_test_and_scratch_dirs_ready()
+ self.install_deps()
+ self.create_reqd_users()
+ self.write_local_config()
+ self.write_ceph_exclude()
+ self.build_and_install()
+
+ def tearDown(self):
+ self.del_users_and_groups()
+ self.del_repos()
+ super(XFSTestsDev, self).tearDown()
+
+ def del_users_and_groups(self):
+ self.mount_a.client_remote.run(args=['sudo', 'userdel', '--force',
+ '--remove', 'fsgqa'],
+ omit_sudo=False, check_status=False)
+ self.mount_a.client_remote.run(args=['sudo', 'userdel', '--force',
+ '--remove', '123456-fsgqa'],
+ omit_sudo=False, check_status=False)
+ self.mount_a.client_remote.run(args=['sudo', 'groupdel', 'fsgqa'],
+ omit_sudo=False, check_status=False)
+
+ def del_repos(self):
+ self.save_results_dir()
+ self.mount_a.client_remote.run(args=f'sudo rm -rf {self.xfstests_repo_path}',
+ omit_sudo=False, check_status=False)
+
+ if self.install_xfsprogs:
+ self.mount_a.client_remote.run(args=f'sudo rm -rf {self.xfsprogs_repo_path}',
+ omit_sudo=False, check_status=False)
+
+ def save_results_dir(self):
+ """
+ When tests in xfstests-dev repo are executed, logs are created and
+ saved, under a directory named "results" that lies at the repo root.
+ In case a test from xfstests-dev repo fails, these logs will help find
+ the cause of the failure.
+
+ Since there's no option in teuthology to copy a directory lying at a
+ custom location in order to save it from teuthology test runner's tear
+ down, let's copy this directory to a standard location that teuthology
+ copies away before erasing all data on the test machine. The standard
+ location chosen in the case here is the Ceph log directory.
+
+ In case of vstart_runner.py, this methods does nothing.
+ """
+ # No need to save results dir in case of vstart_runner.py.
+ for x in ('LocalFuseMount', 'LocalKernelMount'):
+ if x in self.mount_a.__class__.__name__:
+ return
+
+ src = join(self.xfstests_repo_path, self.RESULTS_DIR)
+
+ if self.mount_a.run_shell(f'sudo stat {src}',
+ check_status=False, omit_sudo=False).returncode != 0:
+ log.info(f'xfstests-dev repo contains not directory named '
+ f'"{self.RESULTS_DIR}". repo location: {self.xfstests_repo_path}')
+ return
+
+ std_loc = '/var/log/ceph' # standard location
+ dst = join(std_loc, 'xfstests-dev-results')
+ self.mount_a.run_shell(f'sudo mkdir -p {dst}', omit_sudo=False)
+ self.mount_a.run_shell(f'sudo cp -r {src} {dst}', omit_sudo=False)
+ log.info(f'results dir from xfstests-dev has been saved; it was '
+ f'copied from {self.xfstests_repo_path} to {std_loc}.')
+
+ def build_and_install(self):
+ # NOTE: On teuthology machines it's necessary to run "make" as
+ # superuser since the repo is cloned somewhere in /tmp.
+ self.mount_a.client_remote.run(args=['sudo', 'make'],
+ cwd=self.xfstests_repo_path, stdout=StringIO(),
+ stderr=StringIO())
+ self.mount_a.client_remote.run(args=['sudo', 'make', 'install'],
+ cwd=self.xfstests_repo_path, omit_sudo=False,
+ stdout=StringIO(), stderr=StringIO())
+
+ if self.install_xfsprogs:
+ self.mount_a.client_remote.run(args=['sudo', 'make'],
+ cwd=self.xfsprogs_repo_path,
+ stdout=StringIO(), stderr=StringIO())
+ self.mount_a.client_remote.run(args=['sudo', 'make', 'install'],
+ cwd=self.xfsprogs_repo_path, omit_sudo=False,
+ stdout=StringIO(), stderr=StringIO())
+
+ def get_repos(self):
+ """
+ Clone xfstests_dev and xfsprogs-dev repositories. If already present,
+ update them. The xfsprogs-dev will be used to test the encrypt.
+ """
+ # TODO: make sure that repo is not cloned for every test. it should
+ # happen only once.
+ remoteurl = 'https://git.ceph.com/xfstests-dev.git'
+ self.xfstests_repo_path = self.mount_a.client_remote.mkdtemp(suffix=
+ 'xfstests-dev')
+ self.mount_a.run_shell(['git', 'clone', remoteurl, '--depth', '1',
+ self.xfstests_repo_path])
+
+ if self.install_xfsprogs:
+ remoteurl = 'https://git.ceph.com/xfsprogs-dev.git'
+ self.xfsprogs_repo_path = self.mount_a.client_remote.mkdtemp(suffix=
+ 'xfsprogs-dev')
+ self.mount_a.run_shell(['git', 'clone', remoteurl, '--depth', '1',
+ self.xfsprogs_repo_path])
+
+ def get_admin_key(self):
+ import configparser
+
+ cp = configparser.ConfigParser()
+ cp.read_string(self.fs.mon_manager.raw_cluster_cmd(
+ 'auth', 'get-or-create', 'client.admin'))
+
+ return cp['client.admin']['key']
+
+ def get_test_and_scratch_dirs_ready(self):
+ """ "test" and "scratch" directories are directories inside Ceph FS.
+ And, test and scratch mounts are path on the local FS where "test"
+ and "scratch" directories would be mounted. Look at xfstests-dev
+ local.config's template inside this file to get some context.
+ """
+ self.test_dirname = 'test'
+ self.mount_a.run_shell(['mkdir', self.test_dirname])
+ # read var name as "test dir's mount path"
+ self.test_dirs_mount_path = self.mount_a.client_remote.mkdtemp(
+ suffix=self.test_dirname)
+
+ self.scratch_dirname = 'scratch'
+ self.mount_a.run_shell(['mkdir', self.scratch_dirname])
+ # read var name as "scratch dir's mount path"
+ self.scratch_dirs_mount_path = self.mount_a.client_remote.mkdtemp(
+ suffix=self.scratch_dirname)
+
+ def install_deps(self):
+ from teuthology.misc import get_system_type
+
+ distro, version = get_system_type(self.mount_a.client_remote,
+ distro=True, version=True)
+ distro = distro.lower()
+ major_ver_num = int(version.split('.')[0]) # only keep major release
+ # number
+ log.info(f'distro and version detected is "{distro}" and "{version}".')
+
+ # we keep fedora here so that right deps are installed when this test
+ # is run locally by a dev.
+ if distro in ('redhatenterpriseserver', 'redhatenterprise', 'fedora',
+ 'centos', 'centosstream', 'rhel'):
+ deps = """acl attr automake bc dbench dump e2fsprogs fio \
+ gawk gcc indent libtool lvm2 make psmisc quota sed \
+ xfsdump xfsprogs \
+ libacl-devel libattr-devel libaio-devel libuuid-devel \
+ xfsprogs-devel btrfs-progs-devel python3 sqlite""".split()
+
+ if self.install_xfsprogs:
+ if distro == 'centosstream' and major_ver_num == 8:
+ deps += ['--enablerepo=powertools']
+ deps += ['inih-devel', 'userspace-rcu-devel', 'libblkid-devel',
+ 'gettext', 'libedit-devel', 'libattr-devel',
+ 'device-mapper-devel', 'libicu-devel']
+
+ deps_old_distros = ['xfsprogs-qa-devel']
+
+ if distro != 'fedora' and major_ver_num > 7:
+ deps.remove('btrfs-progs-devel')
+
+ args = ['sudo', 'yum', 'install', '-y'] + deps + deps_old_distros
+ elif distro == 'ubuntu':
+ deps = """xfslibs-dev uuid-dev libtool-bin \
+ e2fsprogs automake gcc libuuid1 quota attr libattr1-dev make \
+ libacl1-dev libaio-dev xfsprogs libgdbm-dev gawk fio dbench \
+ uuid-runtime python sqlite3""".split()
+
+ if self.install_xfsprogs:
+ deps += ['libinih-dev', 'liburcu-dev', 'libblkid-dev',
+ 'gettext', 'libedit-dev', 'libattr1-dev',
+ 'libdevmapper-dev', 'libicu-dev', 'pkg-config']
+
+ if major_ver_num >= 19:
+ deps[deps.index('python')] ='python2'
+ args = ['sudo', 'apt-get', 'install', '-y'] + deps
+ else:
+ raise RuntimeError('expected a yum based or a apt based system')
+
+ self.mount_a.client_remote.run(args=args, omit_sudo=False)
+
+ def create_reqd_users(self):
+ self.mount_a.client_remote.run(args=['sudo', 'useradd', '-m', 'fsgqa'],
+ omit_sudo=False, check_status=False)
+ self.mount_a.client_remote.run(args=['sudo', 'groupadd', 'fsgqa'],
+ omit_sudo=False, check_status=False)
+ self.mount_a.client_remote.run(args=['sudo', 'useradd', 'fsgqa2'],
+ omit_sudo=False, check_status=False)
+ self.mount_a.client_remote.run(args=['sudo', 'useradd',
+ '123456-fsgqa'], omit_sudo=False,
+ check_status=False)
+
+ def write_local_config(self, options=None):
+ if isinstance(self.mount_a, KernelMount):
+ conf_contents = self._gen_conf_for_kernel_mnt(options)
+ elif isinstance(self.mount_a, FuseMount):
+ conf_contents = self._gen_conf_for_fuse_mnt(options)
+
+ self.mount_a.client_remote.write_file(join(self.xfstests_repo_path,
+ 'local.config'),
+ conf_contents, sudo=True)
+ log.info(f'local.config\'s contents -\n{conf_contents}')
+
+ def _gen_conf_for_kernel_mnt(self, options=None):
+ """
+ Generate local.config for CephFS kernel client.
+ """
+ _options = '' if not options else ',' + options
+ mon_sock = self.fs.mon_manager.get_msgrv1_mon_socks()[0]
+ test_dev = mon_sock + ':/' + self.test_dirname
+ scratch_dev = mon_sock + ':/' + self.scratch_dirname
+
+ return dedent(f'''\
+ export FSTYP=ceph
+ export TEST_DEV={test_dev}
+ export TEST_DIR={self.test_dirs_mount_path}
+ export SCRATCH_DEV={scratch_dev}
+ export SCRATCH_MNT={self.scratch_dirs_mount_path}
+ export CEPHFS_MOUNT_OPTIONS="-o name=admin,secret={self.get_admin_key()}{_options}"
+ ''')
+
+ def _gen_conf_for_fuse_mnt(self, options=None):
+ """
+ Generate local.config for CephFS FUSE client.
+ """
+ mon_sock = self.fs.mon_manager.get_msgrv1_mon_socks()[0]
+ test_dev = 'ceph-fuse'
+ scratch_dev = ''
+ # XXX: Please note that ceph_fuse_bin_path is not ideally required
+ # because ceph-fuse binary ought to be present in one of the standard
+ # locations during teuthology tests. But then testing with
+ # vstart_runner.py will not work since ceph-fuse binary won't be
+ # present in a standard locations during these sessions. Thus, this
+ # workaround.
+ ceph_fuse_bin_path = 'ceph-fuse' # bin expected to be in env
+ if 'LocalFuseMount' in str(type(self.mount_a)): # for vstart_runner.py runs
+ ceph_fuse_bin_path = join(os_getcwd(), 'bin', 'ceph-fuse')
+
+ keyring_path = self.mount_a.client_remote.mktemp(
+ data=self.fs.mon_manager.get_keyring('client.admin')+'\n')
+
+ lastline = (f'export CEPHFS_MOUNT_OPTIONS="-m {mon_sock} -k '
+ f'{keyring_path} --client_mountpoint /{self.test_dirname}')
+ lastline += f'-o {options}"' if options else '"'
+
+ return dedent(f'''\
+ export FSTYP=ceph-fuse
+ export CEPH_FUSE_BIN_PATH={ceph_fuse_bin_path}
+ export TEST_DEV={test_dev} # without this tests won't get started
+ export TEST_DIR={self.test_dirs_mount_path}
+ export SCRATCH_DEV={scratch_dev}
+ export SCRATCH_MNT={self.scratch_dirs_mount_path}
+ {lastline}
+ ''')
+
+ def write_ceph_exclude(self):
+ # These tests will fail or take too much time and will
+ # make the test timedout, just skip them for now.
+ xfstests_exclude_contents = dedent('''\
+ {c}/001 {g}/003 {g}/020 {g}/075 {g}/317 {g}/538 {g}/531
+ ''').format(g="generic", c="ceph")
+
+ self.mount_a.client_remote.write_file(join(self.xfstests_repo_path, 'ceph.exclude'),
+ xfstests_exclude_contents, sudo=True)
diff --git a/qa/tasks/cephfs_mirror.py b/qa/tasks/cephfs_mirror.py
new file mode 100644
index 000000000..9602a5a7f
--- /dev/null
+++ b/qa/tasks/cephfs_mirror.py
@@ -0,0 +1,73 @@
+"""
+Task for running cephfs mirror daemons
+"""
+
+import logging
+
+from teuthology.orchestra import run
+from teuthology import misc
+from teuthology.exceptions import ConfigError
+from teuthology.task import Task
+from tasks.ceph_manager import get_valgrind_args
+from tasks.util import get_remote_for_role
+
+log = logging.getLogger(__name__)
+
+class CephFSMirror(Task):
+ def __init__(self, ctx, config):
+ super(CephFSMirror, self).__init__(ctx, config)
+ self.log = log
+
+ def setup(self):
+ super(CephFSMirror, self).setup()
+ try:
+ self.client = self.config['client']
+ except KeyError:
+ raise ConfigError('cephfs-mirror requires a client to connect')
+
+ self.cluster_name, type_, self.client_id = misc.split_role(self.client)
+ if not type_ == 'client':
+ raise ConfigError(f'client role {self.client} must be a client')
+ self.remote = get_remote_for_role(self.ctx, self.client)
+
+ def begin(self):
+ super(CephFSMirror, self).begin()
+ testdir = misc.get_testdir(self.ctx)
+
+ args = [
+ 'adjust-ulimits',
+ 'ceph-coverage',
+ '{tdir}/archive/coverage'.format(tdir=testdir),
+ 'daemon-helper',
+ 'term',
+ ]
+
+ if 'valgrind' in self.config:
+ args = get_valgrind_args(
+ testdir, 'cephfs-mirror-{id}'.format(id=self.client),
+ args, self.config.get('valgrind'))
+
+ args.extend([
+ 'cephfs-mirror',
+ '--cluster',
+ self.cluster_name,
+ '--id',
+ self.client_id,
+ ])
+ if 'run_in_foreground' in self.config:
+ args.extend(['--foreground'])
+
+ self.ctx.daemons.add_daemon(
+ self.remote, 'cephfs-mirror', self.client,
+ args=args,
+ logger=self.log.getChild(self.client),
+ stdin=run.PIPE,
+ wait=False,
+ )
+
+ def end(self):
+ mirror_daemon = self.ctx.daemons.get_daemon('cephfs-mirror', self.client)
+ mirror_daemon.stop()
+ super(CephFSMirror, self).end()
+
+task = CephFSMirror
diff --git a/qa/tasks/cephfs_mirror_thrash.py b/qa/tasks/cephfs_mirror_thrash.py
new file mode 100644
index 000000000..91f60ac50
--- /dev/null
+++ b/qa/tasks/cephfs_mirror_thrash.py
@@ -0,0 +1,219 @@
+"""
+Task for thrashing cephfs-mirror daemons
+"""
+
+import contextlib
+import logging
+import random
+import signal
+import socket
+import time
+
+from gevent import sleep
+from gevent.greenlet import Greenlet
+from gevent.event import Event
+
+from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra import run
+from tasks.thrasher import Thrasher
+
+log = logging.getLogger(__name__)
+
+
+class CephFSMirrorThrasher(Thrasher, Greenlet):
+ """
+ CephFSMirrorThrasher::
+
+ The CephFSMirrorThrasher thrashes cephfs-mirror daemons during execution of other
+ tasks (workunits, etc).
+
+ The config is optional. Many of the config parameters are a maximum value
+ to use when selecting a random value from a range. The config is a dict
+ containing some or all of:
+
+ cluster: [default: ceph] cluster to thrash
+
+ max_thrash: [default: 1] the maximum number of active cephfs-mirror daemons per
+ cluster will be thrashed at any given time.
+
+ min_thrash_delay: [default: 60] minimum number of seconds to delay before
+ thrashing again.
+
+ max_thrash_delay: [default: 120] maximum number of seconds to delay before
+ thrashing again.
+
+ max_revive_delay: [default: 10] maximum number of seconds to delay before
+ bringing back a thrashed cephfs-mirror daemon.
+
+ randomize: [default: true] enables randomization and use the max/min values
+
+ seed: [no default] seed the random number generator
+
+ Examples::
+
+ The following example disables randomization, and uses the max delay
+ values:
+
+ tasks:
+ - ceph:
+ - cephfs_mirror_thrash:
+ randomize: False
+ max_thrash_delay: 10
+ """
+
+ def __init__(self, ctx, config, cluster, daemons):
+ super(CephFSMirrorThrasher, self).__init__()
+
+ self.ctx = ctx
+ self.config = config
+ self.cluster = cluster
+ self.daemons = daemons
+
+ self.logger = log
+ self.name = 'thrasher.cephfs_mirror.[{cluster}]'.format(cluster = cluster)
+ self.stopping = Event()
+
+ self.randomize = bool(self.config.get('randomize', True))
+ self.max_thrash = int(self.config.get('max_thrash', 1))
+ self.min_thrash_delay = float(self.config.get('min_thrash_delay', 5.0))
+ self.max_thrash_delay = float(self.config.get('max_thrash_delay', 10))
+ self.max_revive_delay = float(self.config.get('max_revive_delay', 15.0))
+
+ def _run(self):
+ try:
+ self.do_thrash()
+ except Exception as e:
+ # See _run exception comment for MDSThrasher
+ self.set_thrasher_exception(e)
+ self.logger.exception("exception:")
+ # Allow successful completion so gevent doesn't see an exception.
+ # The DaemonWatchdog will observe the error and tear down the test.
+
+ def log(self, x):
+ """Write data to logger assigned to this CephFSMirrorThrasher"""
+ self.logger.info(x)
+
+ def stop(self):
+ self.stopping.set()
+
+ def do_thrash(self):
+ """
+ Perform the random thrashing action
+ """
+
+ self.log('starting thrash for cluster {cluster}'.format(cluster=self.cluster))
+ stats = {
+ "kill": 0,
+ }
+
+ while not self.stopping.is_set():
+ delay = self.max_thrash_delay
+ if self.randomize:
+ delay = random.randrange(self.min_thrash_delay, self.max_thrash_delay)
+
+ if delay > 0.0:
+ self.log('waiting for {delay} secs before thrashing'.format(delay=delay))
+ self.stopping.wait(delay)
+ if self.stopping.is_set():
+ continue
+
+ killed_daemons = []
+
+ weight = 1.0 / len(self.daemons)
+ count = 0
+ for daemon in self.daemons:
+ skip = random.uniform(0.0, 1.0)
+ if weight <= skip:
+ self.log('skipping daemon {label} with skip ({skip}) > weight ({weight})'.format(
+ label=daemon.id_, skip=skip, weight=weight))
+ continue
+
+ self.log('kill {label}'.format(label=daemon.id_))
+ try:
+ daemon.signal(signal.SIGTERM)
+ except Exception as e:
+ self.log(f'exception when stopping mirror daemon: {e}')
+ else:
+ killed_daemons.append(daemon)
+ stats['kill'] += 1
+
+ # if we've reached max_thrash, we're done
+ count += 1
+ if count >= self.max_thrash:
+ break
+
+ if killed_daemons:
+ # wait for a while before restarting
+ delay = self.max_revive_delay
+ if self.randomize:
+ delay = random.randrange(0.0, self.max_revive_delay)
+
+ self.log('waiting for {delay} secs before reviving daemons'.format(delay=delay))
+ sleep(delay)
+
+ for daemon in killed_daemons:
+ self.log('waiting for {label}'.format(label=daemon.id_))
+ try:
+ run.wait([daemon.proc], timeout=600)
+ except CommandFailedError:
+ pass
+ except:
+ self.log('Failed to stop {label}'.format(label=daemon.id_))
+
+ try:
+ # try to capture a core dump
+ daemon.signal(signal.SIGABRT)
+ except socket.error:
+ pass
+ raise
+ finally:
+ daemon.reset()
+
+ for daemon in killed_daemons:
+ self.log('reviving {label}'.format(label=daemon.id_))
+ daemon.start()
+
+ for stat in stats:
+ self.log("stat['{key}'] = {value}".format(key = stat, value = stats[stat]))
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Stress test the cephfs-mirror by thrashing while another task/workunit
+ is running.
+
+ Please refer to CephFSMirrorThrasher class for further information on the
+ available options.
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'cephfs_mirror_thrash task only accepts a dict for configuration'
+
+ cluster = config.get('cluster', 'ceph')
+ daemons = list(ctx.daemons.iter_daemons_of_role('cephfs-mirror', cluster))
+ assert len(daemons) > 0, \
+ 'cephfs_mirror_thrash task requires at least 1 cephfs-mirror daemon'
+
+ # choose random seed
+ if 'seed' in config:
+ seed = int(config['seed'])
+ else:
+ seed = int(time.time())
+ log.info('cephfs_mirror_thrash using random seed: {seed}'.format(seed=seed))
+ random.seed(seed)
+
+ thrasher = CephFSMirrorThrasher(ctx, config, cluster, daemons)
+ thrasher.start()
+ ctx.ceph[cluster].thrashers.append(thrasher)
+
+ try:
+ log.debug('Yielding')
+ yield
+ finally:
+ log.info('joining cephfs_mirror_thrash')
+ thrasher.stop()
+ if thrasher.exception is not None:
+ raise RuntimeError('error during thrashing')
+ thrasher.join()
+ log.info('done joining')
diff --git a/qa/tasks/cephfs_test_runner.py b/qa/tasks/cephfs_test_runner.py
new file mode 100644
index 000000000..8a4919b93
--- /dev/null
+++ b/qa/tasks/cephfs_test_runner.py
@@ -0,0 +1,213 @@
+import contextlib
+import logging
+import os
+import unittest
+from unittest import suite, loader, case
+from teuthology.task import interactive
+from teuthology import misc
+from tasks.cephfs.filesystem import Filesystem, MDSCluster, CephCluster
+from tasks.mgr.mgr_test_case import MgrCluster
+
+log = logging.getLogger(__name__)
+
+
+class DecoratingLoader(loader.TestLoader):
+ """
+ A specialization of TestLoader that tags some extra attributes
+ onto test classes as they are loaded.
+ """
+ def __init__(self, params):
+ self._params = params
+ super(DecoratingLoader, self).__init__()
+
+ def _apply_params(self, obj):
+ for k, v in self._params.items():
+ if obj.__class__ is type:
+ cls = obj
+ else:
+ cls = obj.__class__
+ setattr(cls, k, v)
+
+ def loadTestsFromTestCase(self, testCaseClass):
+ self._apply_params(testCaseClass)
+ return super(DecoratingLoader, self).loadTestsFromTestCase(testCaseClass)
+
+ def loadTestsFromName(self, name, module=None):
+ result = super(DecoratingLoader, self).loadTestsFromName(name, module)
+
+ # Special case for when we were called with the name of a method, we get
+ # a suite with one TestCase
+ tests_in_result = list(result)
+ if len(tests_in_result) == 1 and isinstance(tests_in_result[0], case.TestCase):
+ self._apply_params(tests_in_result[0])
+
+ return result
+
+
+class LogStream(object):
+ def __init__(self):
+ self.buffer = ""
+
+ def write(self, data):
+ self.buffer += data
+ if "\n" in self.buffer:
+ lines = self.buffer.split("\n")
+ for line in lines[:-1]:
+ log.info(line)
+ self.buffer = lines[-1]
+
+ def flush(self):
+ pass
+
+
+class InteractiveFailureResult(unittest.TextTestResult):
+ """
+ Specialization that implements interactive-on-error style
+ behavior.
+ """
+ ctx = None
+
+ def addFailure(self, test, err):
+ log.error(self._exc_info_to_string(err, test))
+ log.error("Failure in test '{0}', going interactive".format(
+ self.getDescription(test)
+ ))
+ interactive.task(ctx=self.ctx, config=None)
+
+ def addError(self, test, err):
+ log.error(self._exc_info_to_string(err, test))
+ log.error("Error in test '{0}', going interactive".format(
+ self.getDescription(test)
+ ))
+ interactive.task(ctx=self.ctx, config=None)
+
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ Run the CephFS test cases.
+
+ Run everything in tasks/cephfs/test_*.py:
+
+ ::
+
+ tasks:
+ - install:
+ - ceph:
+ - ceph-fuse:
+ - cephfs_test_runner:
+
+ `modules` argument allows running only some specific modules:
+
+ ::
+
+ tasks:
+ ...
+ - cephfs_test_runner:
+ modules:
+ - tasks.cephfs.test_sessionmap
+ - tasks.cephfs.test_auto_repair
+
+ By default, any cases that can't be run on the current cluster configuration
+ will generate a failure. When the optional `fail_on_skip` argument is set
+ to false, any tests that can't be run on the current configuration will
+ simply be skipped:
+
+ ::
+ tasks:
+ ...
+ - cephfs_test_runner:
+ fail_on_skip: false
+
+ """
+
+ ceph_cluster = CephCluster(ctx)
+
+ if len(list(misc.all_roles_of_type(ctx.cluster, 'mds'))):
+ mds_cluster = MDSCluster(ctx)
+ fs = Filesystem(ctx)
+ else:
+ mds_cluster = None
+ fs = None
+
+ if len(list(misc.all_roles_of_type(ctx.cluster, 'mgr'))):
+ mgr_cluster = MgrCluster(ctx)
+ else:
+ mgr_cluster = None
+
+ # Mount objects, sorted by ID
+ if hasattr(ctx, 'mounts'):
+ mounts = [v for k, v in sorted(ctx.mounts.items(), key=lambda mount: mount[0])]
+ else:
+ # The test configuration has a filesystem but no fuse/kclient mounts
+ mounts = []
+
+ decorating_loader = DecoratingLoader({
+ "ctx": ctx,
+ "mounts": mounts,
+ "fs": fs,
+ "ceph_cluster": ceph_cluster,
+ "mds_cluster": mds_cluster,
+ "mgr_cluster": mgr_cluster,
+ })
+
+ fail_on_skip = config.get('fail_on_skip', True)
+
+ # Put useful things onto ctx for interactive debugging
+ ctx.fs = fs
+ ctx.mds_cluster = mds_cluster
+ ctx.mgr_cluster = mgr_cluster
+
+ # Depending on config, either load specific modules, or scan for moduless
+ if config and 'modules' in config and config['modules']:
+ module_suites = []
+ for mod_name in config['modules']:
+ # Test names like cephfs.test_auto_repair
+ module_suites.append(decorating_loader.loadTestsFromName(mod_name))
+ overall_suite = suite.TestSuite(module_suites)
+ else:
+ # Default, run all tests
+ overall_suite = decorating_loader.discover(
+ os.path.join(
+ os.path.dirname(os.path.abspath(__file__)),
+ "cephfs/"
+ )
+ )
+
+ if ctx.config.get("interactive-on-error", False):
+ InteractiveFailureResult.ctx = ctx
+ result_class = InteractiveFailureResult
+ else:
+ result_class = unittest.TextTestResult
+
+ class LoggingResult(result_class):
+ def startTest(self, test):
+ log.info("Starting test: {0}".format(self.getDescription(test)))
+ return super(LoggingResult, self).startTest(test)
+
+ def addSkip(self, test, reason):
+ if fail_on_skip:
+ # Don't just call addFailure because that requires a traceback
+ self.failures.append((test, reason))
+ else:
+ super(LoggingResult, self).addSkip(test, reason)
+
+ # Execute!
+ result = unittest.TextTestRunner(
+ stream=LogStream(),
+ resultclass=LoggingResult,
+ verbosity=2,
+ failfast=True).run(overall_suite)
+
+ if not result.wasSuccessful():
+ result.printErrors() # duplicate output at end for convenience
+
+ bad_tests = []
+ for test, error in result.errors:
+ bad_tests.append(str(test))
+ for test, failure in result.failures:
+ bad_tests.append(str(test))
+
+ raise RuntimeError("Test failure: {0}".format(", ".join(bad_tests)))
+
+ yield
diff --git a/qa/tasks/cephfs_upgrade_snap.py b/qa/tasks/cephfs_upgrade_snap.py
new file mode 100644
index 000000000..1b0a737a7
--- /dev/null
+++ b/qa/tasks/cephfs_upgrade_snap.py
@@ -0,0 +1,47 @@
+"""
+Upgrade cluster snap format.
+"""
+
+import logging
+import time
+
+from tasks.cephfs.filesystem import Filesystem
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+ """
+ Upgrade CephFS file system snap format.
+ """
+
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'snap-upgrade task only accepts a dict for configuration'
+
+ fs = Filesystem(ctx)
+
+ mds_map = fs.get_mds_map()
+ assert(mds_map['max_mds'] == 1)
+
+ json = fs.run_scrub(["start", "/", "force", "recursive", "repair"])
+ if not json or json['return_code'] == 0:
+ assert(fs.wait_until_scrub_complete(tag=json["scrub_tag"]) == True)
+ log.info("scrub / completed")
+ else:
+ log.info("scrub / failed: {}".format(json))
+
+ json = fs.run_scrub(["start", "~mdsdir", "force", "recursive", "repair"])
+ if not json or json['return_code'] == 0:
+ assert(fs.wait_until_scrub_complete(tag=json["scrub_tag"]) == True)
+ log.info("scrub ~mdsdir completed")
+ else:
+ log.info("scrub / failed: {}".format(json))
+
+ for i in range(0, 10):
+ mds_map = fs.get_mds_map()
+ if (mds_map['flags'] & (1<<1)) != 0 and (mds_map['flags'] & (1<<4)) != 0:
+ break
+ time.sleep(10)
+ assert((mds_map['flags'] & (1<<1)) != 0) # Test CEPH_MDSMAP_ALLOW_SNAPS
+ assert((mds_map['flags'] & (1<<4)) != 0) # Test CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS