diff options
Diffstat (limited to 'qa/tasks/cephfs/test_exports.py')
-rw-r--r-- | qa/tasks/cephfs/test_exports.py | 519 |
1 files changed, 519 insertions, 0 deletions
diff --git a/qa/tasks/cephfs/test_exports.py b/qa/tasks/cephfs/test_exports.py new file mode 100644 index 000000000..d2421bedc --- /dev/null +++ b/qa/tasks/cephfs/test_exports.py @@ -0,0 +1,519 @@ +import logging +import random +import time +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.orchestra.run import CommandFailedError + +log = logging.getLogger(__name__) + +class TestExports(CephFSTestCase): + MDSS_REQUIRED = 2 + CLIENTS_REQUIRED = 2 + + def test_session_race(self): + """ + Test session creation race. + + See: https://tracker.ceph.com/issues/24072#change-113056 + """ + + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + rank1 = self.fs.get_rank(rank=1, status=status) + + # Create a directory that is pre-exported to rank 1 + self.mount_a.run_shell(["mkdir", "-p", "a/aa"]) + self.mount_a.setfattr("a", "ceph.dir.pin", "1") + self._wait_subtrees([('/a', 1)], status=status, rank=1) + + # Now set the mds config to allow the race + self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "true"], rank=1) + + # Now create another directory and try to export it + self.mount_b.run_shell(["mkdir", "-p", "b/bb"]) + self.mount_b.setfattr("b", "ceph.dir.pin", "1") + + time.sleep(5) + + # Now turn off the race so that it doesn't wait again + self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "false"], rank=1) + + # Now try to create a session with rank 1 by accessing a dir known to + # be there, if buggy, this should cause the rank 1 to crash: + self.mount_b.run_shell(["ls", "a"]) + + # Check if rank1 changed (standby tookover?) + new_rank1 = self.fs.get_rank(rank=1) + self.assertEqual(rank1['gid'], new_rank1['gid']) + +class TestExportPin(CephFSTestCase): + MDSS_REQUIRED = 3 + CLIENTS_REQUIRED = 1 + + def setUp(self): + CephFSTestCase.setUp(self) + + self.fs.set_max_mds(3) + self.status = self.fs.wait_for_daemons() + + self.mount_a.run_shell_payload("mkdir -p 1/2/3/4") + + def test_noop(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "-1") + time.sleep(30) # for something to not happen + self._wait_subtrees([], status=self.status) + + def test_negative(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "-2341") + time.sleep(30) # for something to not happen + self._wait_subtrees([], status=self.status) + + def test_empty_pin(self): + self.mount_a.setfattr("1/2/3/4", "ceph.dir.pin", "1") + time.sleep(30) # for something to not happen + self._wait_subtrees([], status=self.status) + + def test_trivial(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self._wait_subtrees([('/1', 1)], status=self.status, rank=1) + + def test_export_targets(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self._wait_subtrees([('/1', 1)], status=self.status, rank=1) + self.status = self.fs.status() + r0 = self.status.get_rank(self.fs.id, 0) + self.assertTrue(sorted(r0['export_targets']) == [1]) + + def test_redundant(self): + # redundant pin /1/2 to rank 1 + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self._wait_subtrees([('/1', 1)], status=self.status, rank=1) + self.mount_a.setfattr("1/2", "ceph.dir.pin", "1") + self._wait_subtrees([('/1', 1), ('/1/2', 1)], status=self.status, rank=1) + + def test_reassignment(self): + self.mount_a.setfattr("1/2", "ceph.dir.pin", "1") + self._wait_subtrees([('/1/2', 1)], status=self.status, rank=1) + self.mount_a.setfattr("1/2", "ceph.dir.pin", "0") + self._wait_subtrees([('/1/2', 0)], status=self.status, rank=0) + + def test_phantom_rank(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "0") + self.mount_a.setfattr("1/2", "ceph.dir.pin", "10") + time.sleep(30) # wait for nothing weird to happen + self._wait_subtrees([('/1', 0)], status=self.status) + + def test_nested(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self.mount_a.setfattr("1/2", "ceph.dir.pin", "0") + self.mount_a.setfattr("1/2/3", "ceph.dir.pin", "2") + self._wait_subtrees([('/1', 1), ('/1/2', 0), ('/1/2/3', 2)], status=self.status, rank=2) + + def test_nested_unset(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self.mount_a.setfattr("1/2", "ceph.dir.pin", "2") + self._wait_subtrees([('/1', 1), ('/1/2', 2)], status=self.status, rank=1) + self.mount_a.setfattr("1/2", "ceph.dir.pin", "-1") + self._wait_subtrees([('/1', 1)], status=self.status, rank=1) + + def test_rename(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self.mount_a.run_shell_payload("mkdir -p 9/8/7") + self.mount_a.setfattr("9/8", "ceph.dir.pin", "0") + self._wait_subtrees([('/1', 1), ("/9/8", 0)], status=self.status, rank=0) + self.mount_a.run_shell_payload("mv 9/8 1/2") + self._wait_subtrees([('/1', 1), ("/1/2/8", 0)], status=self.status, rank=0) + + def test_getfattr(self): + # pin /1 to rank 0 + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self.mount_a.setfattr("1/2", "ceph.dir.pin", "0") + self._wait_subtrees([('/1', 1), ('/1/2', 0)], status=self.status, rank=1) + + if not isinstance(self.mount_a, FuseMount): + p = self.mount_a.client_remote.sh('uname -r', wait=True) + dir_pin = self.mount_a.getfattr("1", "ceph.dir.pin") + log.debug("mount.getfattr('1','ceph.dir.pin'): %s " % dir_pin) + if str(p) < "5" and not(dir_pin): + self.skipTest("Kernel does not support getting the extended attribute ceph.dir.pin") + self.assertEqual(self.mount_a.getfattr("1", "ceph.dir.pin"), '1') + self.assertEqual(self.mount_a.getfattr("1/2", "ceph.dir.pin"), '0') + + def test_export_pin_cache_drop(self): + """ + That the export pin does not prevent empty (nothing in cache) subtree merging. + """ + + self.mount_a.setfattr("1", "ceph.dir.pin", "0") + self.mount_a.setfattr("1/2", "ceph.dir.pin", "1") + self._wait_subtrees([('/1', 0), ('/1/2', 1)], status=self.status) + self.mount_a.umount_wait() # release all caps + def _drop(): + self.fs.ranks_tell(["cache", "drop"], status=self.status) + # drop cache multiple times to clear replica pins + self._wait_subtrees([], status=self.status, action=_drop) + +class TestEphemeralPins(CephFSTestCase): + MDSS_REQUIRED = 3 + CLIENTS_REQUIRED = 1 + + def setUp(self): + CephFSTestCase.setUp(self) + + self.config_set('mds', 'mds_export_ephemeral_random', True) + self.config_set('mds', 'mds_export_ephemeral_distributed', True) + self.config_set('mds', 'mds_export_ephemeral_random_max', 1.0) + + self.mount_a.run_shell_payload(""" +set -e + +# Use up a random number of inode numbers so the ephemeral pinning is not the same every test. +mkdir .inode_number_thrash +count=$((RANDOM % 1024)) +for ((i = 0; i < count; i++)); do touch .inode_number_thrash/$i; done +rm -rf .inode_number_thrash +""") + + self.fs.set_max_mds(3) + self.status = self.fs.wait_for_daemons() + + def _setup_tree(self, path="tree", export=-1, distributed=False, random=0.0, count=100, wait=True): + return self.mount_a.run_shell_payload(f""" +set -ex +mkdir -p {path} +{f"setfattr -n ceph.dir.pin -v {export} {path}" if export >= 0 else ""} +{f"setfattr -n ceph.dir.pin.distributed -v 1 {path}" if distributed else ""} +{f"setfattr -n ceph.dir.pin.random -v {random} {path}" if random > 0.0 else ""} +for ((i = 0; i < {count}; i++)); do + mkdir -p "{path}/$i" + echo file > "{path}/$i/file" +done +""", wait=wait) + + def test_ephemeral_pin_dist_override(self): + """ + That an ephemeral distributed pin overrides a normal export pin. + """ + + self._setup_tree(distributed=True) + subtrees = self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + for s in subtrees: + path = s['dir']['path'] + if path == '/tree': + self.assertTrue(s['distributed_ephemeral_pin']) + + def test_ephemeral_pin_dist_override_pin(self): + """ + That an export pin overrides an ephemerally pinned directory. + """ + + self._setup_tree(distributed=True) + subtrees = self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + self.mount_a.setfattr("tree", "ceph.dir.pin", "0") + time.sleep(15) + subtrees = self._get_subtrees(status=self.status, rank=0) + for s in subtrees: + path = s['dir']['path'] + if path == '/tree': + self.assertEqual(s['auth_first'], 0) + self.assertFalse(s['distributed_ephemeral_pin']) + # it has been merged into /tree + + def test_ephemeral_pin_dist_off(self): + """ + That turning off ephemeral distributed pin merges subtrees. + """ + + self._setup_tree(distributed=True) + self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "0") + time.sleep(15) + subtrees = self._get_subtrees(status=self.status, rank=0) + for s in subtrees: + path = s['dir']['path'] + if path == '/tree': + self.assertFalse(s['distributed_ephemeral_pin']) + + + def test_ephemeral_pin_dist_conf_off(self): + """ + That turning off ephemeral distributed pin config prevents distribution. + """ + + self._setup_tree() + self.config_set('mds', 'mds_export_ephemeral_distributed', False) + self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "1") + time.sleep(15) + subtrees = self._get_subtrees(status=self.status, rank=0) + for s in subtrees: + path = s['dir']['path'] + if path == '/tree': + self.assertFalse(s['distributed_ephemeral_pin']) + + def _test_ephemeral_pin_dist_conf_off_merge(self): + """ + That turning off ephemeral distributed pin config merges subtrees. + FIXME: who triggers the merge? + """ + + self._setup_tree(distributed=True) + self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + self.config_set('mds', 'mds_export_ephemeral_distributed', False) + self._wait_subtrees([('/tree', 0)], timeout=60, status=self.status) + + def test_ephemeral_pin_dist_override_before(self): + """ + That a conventional export pin overrides the distributed policy _before_ distributed policy is set. + """ + + count = 10 + self._setup_tree(count=count) + test = [] + for i in range(count): + path = f"tree/{i}" + self.mount_a.setfattr(path, "ceph.dir.pin", "1") + test.append(("/"+path, 1)) + self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "1") + time.sleep(15) # for something to not happen... + self._wait_subtrees(test, timeout=60, status=self.status, rank="all", path="/tree/") + + def test_ephemeral_pin_dist_override_after(self): + """ + That a conventional export pin overrides the distributed policy _after_ distributed policy is set. + """ + + self._setup_tree(distributed=True) + self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + test = [] + for i in range(10): + path = f"tree/{i}" + self.mount_a.setfattr(path, "ceph.dir.pin", "1") + test.append(("/"+path, 1)) + self._wait_subtrees(test, timeout=60, status=self.status, rank="all", path="/tree/") + + def test_ephemeral_pin_dist_failover(self): + """ + That MDS failover does not cause unnecessary migrations. + """ + + # pin /tree so it does not export during failover + self._setup_tree(distributed=True) + self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + #test = [(s['dir']['path'], s['auth_first']) for s in subtrees] + before = self.fs.ranks_perf(lambda p: p['mds']['exported']) + log.info(f"export stats: {before}") + self.fs.rank_fail(rank=1) + self.status = self.fs.wait_for_daemons() + time.sleep(10) # waiting for something to not happen + after = self.fs.ranks_perf(lambda p: p['mds']['exported']) + log.info(f"export stats: {after}") + self.assertEqual(before, after) + + def test_ephemeral_pin_distribution(self): + """ + That ephemerally pinned subtrees are somewhat evenly distributed. + """ + + max_mds = 3 + frags = 128 + + self.fs.set_max_mds(max_mds) + self.status = self.fs.wait_for_daemons() + + self.config_set('mds', 'mds_export_ephemeral_distributed_factor', (frags-1) / max_mds) + self._setup_tree(count=1000, distributed=True) + + subtrees = self._wait_distributed_subtrees(frags, status=self.status, rank="all") + nsubtrees = len(subtrees) + + # Check if distribution is uniform + rank0 = list(filter(lambda x: x['auth_first'] == 0, subtrees)) + rank1 = list(filter(lambda x: x['auth_first'] == 1, subtrees)) + rank2 = list(filter(lambda x: x['auth_first'] == 2, subtrees)) + self.assertGreaterEqual(len(rank0)/nsubtrees, 0.15) + self.assertGreaterEqual(len(rank1)/nsubtrees, 0.15) + self.assertGreaterEqual(len(rank2)/nsubtrees, 0.15) + + + def test_ephemeral_random(self): + """ + That 100% randomness causes all children to be pinned. + """ + self._setup_tree(random=1.0) + self._wait_random_subtrees(100, status=self.status, rank="all") + + def test_ephemeral_random_max(self): + """ + That the config mds_export_ephemeral_random_max is not exceeded. + """ + + r = 0.5 + count = 1000 + self._setup_tree(count=count, random=r) + subtrees = self._wait_random_subtrees(int(r*count*.75), status=self.status, rank="all") + self.config_set('mds', 'mds_export_ephemeral_random_max', 0.01) + self._setup_tree(path="tree/new", count=count) + time.sleep(30) # for something not to happen... + subtrees = self._get_subtrees(status=self.status, rank="all", path="tree/new/") + self.assertLessEqual(len(subtrees), int(.01*count*1.25)) + + def test_ephemeral_random_max_config(self): + """ + That the config mds_export_ephemeral_random_max config rejects new OOB policies. + """ + + self.config_set('mds', 'mds_export_ephemeral_random_max', 0.01) + try: + p = self._setup_tree(count=1, random=0.02, wait=False) + p.wait() + except CommandFailedError as e: + log.info(f"{e}") + self.assertIn("Invalid", p.stderr.getvalue()) + else: + raise RuntimeError("mds_export_ephemeral_random_max ignored!") + + def test_ephemeral_random_dist(self): + """ + That ephemeral distributed pin overrides ephemeral random pin + """ + + self._setup_tree(random=1.0, distributed=True) + self._wait_distributed_subtrees(3 * 2, status=self.status) + + time.sleep(15) + subtrees = self._get_subtrees(status=self.status, rank=0) + for s in subtrees: + path = s['dir']['path'] + if path.startswith('/tree'): + self.assertFalse(s['random_ephemeral_pin']) + + def test_ephemeral_random_pin_override_before(self): + """ + That a conventional export pin overrides the random policy before creating new directories. + """ + + self._setup_tree(count=0, random=1.0) + self._setup_tree(path="tree/pin", count=10, export=1) + self._wait_subtrees([("/tree/pin", 1)], status=self.status, rank=1, path="/tree/pin") + + def test_ephemeral_random_pin_override_after(self): + """ + That a conventional export pin overrides the random policy after creating new directories. + """ + + count = 10 + self._setup_tree(count=0, random=1.0) + self._setup_tree(path="tree/pin", count=count) + self._wait_random_subtrees(count+1, status=self.status, rank="all") + self.mount_a.setfattr("tree/pin", "ceph.dir.pin", "1") + self._wait_subtrees([("/tree/pin", 1)], status=self.status, rank=1, path="/tree/pin") + + def test_ephemeral_randomness(self): + """ + That the randomness is reasonable. + """ + + r = random.uniform(0.25, 0.75) # ratios don't work for small r! + count = 1000 + self._setup_tree(count=count, random=r) + subtrees = self._wait_random_subtrees(int(r*count*.50), status=self.status, rank="all") + time.sleep(30) # for max to not be exceeded + subtrees = self._wait_random_subtrees(int(r*count*.50), status=self.status, rank="all") + self.assertLessEqual(len(subtrees), int(r*count*1.50)) + + def test_ephemeral_random_cache_drop(self): + """ + That the random ephemeral pin does not prevent empty (nothing in cache) subtree merging. + """ + + count = 100 + self._setup_tree(count=count, random=1.0) + self._wait_random_subtrees(count, status=self.status, rank="all") + self.mount_a.umount_wait() # release all caps + def _drop(): + self.fs.ranks_tell(["cache", "drop"], status=self.status) + self._wait_subtrees([], status=self.status, action=_drop) + + def test_ephemeral_random_failover(self): + """ + That the random ephemeral pins stay pinned across MDS failover. + """ + + count = 100 + r = 0.5 + self._setup_tree(count=count, random=r) + # wait for all random subtrees to be created, not a specific count + time.sleep(30) + subtrees = self._wait_random_subtrees(1, status=self.status, rank=1) + before = [(s['dir']['path'], s['auth_first']) for s in subtrees] + before.sort(); + + self.fs.rank_fail(rank=1) + self.status = self.fs.wait_for_daemons() + + time.sleep(30) # waiting for something to not happen + subtrees = self._wait_random_subtrees(1, status=self.status, rank=1) + after = [(s['dir']['path'], s['auth_first']) for s in subtrees] + after.sort(); + log.info(f"subtrees before: {before}") + log.info(f"subtrees after: {after}") + + self.assertEqual(before, after) + + def test_ephemeral_pin_grow_mds(self): + """ + That consistent hashing works to reduce the number of migrations. + """ + + self.fs.set_max_mds(2) + self.status = self.fs.wait_for_daemons() + + self._setup_tree(random=1.0) + subtrees_old = self._wait_random_subtrees(100, status=self.status, rank="all") + + self.fs.set_max_mds(3) + self.status = self.fs.wait_for_daemons() + + # Sleeping for a while to allow the ephemeral pin migrations to complete + time.sleep(30) + + subtrees_new = self._wait_random_subtrees(100, status=self.status, rank="all") + count = 0 + for old_subtree in subtrees_old: + for new_subtree in subtrees_new: + if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']): + count = count + 1 + break + + log.info("{0} migrations have occured due to the cluster resizing".format(count)) + # ~50% of subtrees from the two rank will migrate to another rank + self.assertLessEqual((count/len(subtrees_old)), (0.5)*1.25) # with 25% overbudget + + def test_ephemeral_pin_shrink_mds(self): + """ + That consistent hashing works to reduce the number of migrations. + """ + + self.fs.set_max_mds(3) + self.status = self.fs.wait_for_daemons() + + self._setup_tree(random=1.0) + subtrees_old = self._wait_random_subtrees(100, status=self.status, rank="all") + + self.fs.set_max_mds(2) + self.status = self.fs.wait_for_daemons() + time.sleep(30) + + subtrees_new = self._wait_random_subtrees(100, status=self.status, rank="all") + count = 0 + for old_subtree in subtrees_old: + for new_subtree in subtrees_new: + if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']): + count = count + 1 + break + + log.info("{0} migrations have occured due to the cluster resizing".format(count)) + # rebalancing from 3 -> 2 may cause half of rank 0/1 to move and all of rank 2 + self.assertLessEqual((count/len(subtrees_old)), (1.0/3.0/2.0 + 1.0/3.0/2.0 + 1.0/3.0)*1.25) # aka .66 with 25% overbudget |