summaryrefslogtreecommitdiffstats
path: root/qa/tasks/cephfs/test_forward_scrub.py
diff options
context:
space:
mode:
Diffstat (limited to 'qa/tasks/cephfs/test_forward_scrub.py')
-rw-r--r--qa/tasks/cephfs/test_forward_scrub.py208
1 files changed, 208 insertions, 0 deletions
diff --git a/qa/tasks/cephfs/test_forward_scrub.py b/qa/tasks/cephfs/test_forward_scrub.py
index f3cec881b..334a73e1c 100644
--- a/qa/tasks/cephfs/test_forward_scrub.py
+++ b/qa/tasks/cephfs/test_forward_scrub.py
@@ -9,6 +9,7 @@ how the functionality responds to damaged metadata.
"""
import logging
import json
+import errno
from collections import namedtuple
from io import BytesIO
@@ -46,6 +47,9 @@ class TestForwardScrub(CephFSTestCase):
return inos
+ def _is_MDS_damage(self):
+ return "MDS_DAMAGE" in self.mds_cluster.mon_manager.get_mon_health()['checks']
+
def test_apply_tag(self):
self.mount_a.run_shell(["mkdir", "parentdir"])
self.mount_a.run_shell(["mkdir", "parentdir/childdir"])
@@ -305,3 +309,207 @@ class TestForwardScrub(CephFSTestCase):
backtrace = self.fs.read_backtrace(file_ino)
self.assertEqual(['alpha', 'parent_a'],
[a['dname'] for a in backtrace['ancestors']])
+
+ def test_health_status_after_dentry_repair(self):
+ """
+ Test that the damage health status is cleared
+ after the damaged dentry is repaired
+ """
+ # Create a file for checks
+ self.mount_a.run_shell(["mkdir", "subdir/"])
+
+ self.mount_a.run_shell(["touch", "subdir/file_undamaged"])
+ self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"])
+
+ subdir_ino = self.mount_a.path_to_ino("subdir")
+
+ self.mount_a.umount_wait()
+ for mds_name in self.fs.get_active_names():
+ self.fs.mds_asok(["flush", "journal"], mds_name)
+
+ self.fs.fail()
+
+ # Corrupt a dentry
+ junk = "deadbeef" * 10
+ dirfrag_obj = "{0:x}.00000000".format(subdir_ino)
+ self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
+
+ # Start up and try to list it
+ self.fs.set_joinable()
+ self.fs.wait_for_daemons()
+
+ self.mount_a.mount_wait()
+ dentries = self.mount_a.ls("subdir/")
+
+ # The damaged guy should have disappeared
+ self.assertEqual(dentries, ["file_undamaged"])
+
+ # I should get ENOENT if I try and read it normally, because
+ # the dir is considered complete
+ try:
+ self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
+ except CommandFailedError as e:
+ self.assertEqual(e.exitstatus, errno.ENOENT)
+ else:
+ raise AssertionError("Expected ENOENT")
+
+ nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
+ self.assertEqual(nfiles, "2")
+
+ self.mount_a.umount_wait()
+
+ out_json = self.fs.run_scrub(["start", "/subdir", "recursive"])
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+ # Check that an entry for dentry damage is created in the damage table
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "ls", '--format=json-pretty'))
+ self.assertEqual(len(damage), 1)
+ self.assertEqual(damage[0]['damage_type'], "dentry")
+ self.wait_until_true(lambda: self._is_MDS_damage(), timeout=100)
+
+ out_json = self.fs.run_scrub(["start", "/subdir", "repair,recursive"])
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+ # Check that the entry is cleared from the damage table
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "ls", '--format=json-pretty'))
+ self.assertEqual(len(damage), 0)
+ self.wait_until_true(lambda: not self._is_MDS_damage(), timeout=100)
+
+ self.mount_a.mount_wait()
+
+ # Check that the file count is now correct
+ nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
+ self.assertEqual(nfiles, "1")
+
+ # Clean up the omap object
+ self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
+
+ def test_health_status_after_dirfrag_repair(self):
+ """
+ Test that the damage health status is cleared
+ after the damaged dirfrag is repaired
+ """
+ self.mount_a.run_shell(["mkdir", "dir"])
+ self.mount_a.run_shell(["touch", "dir/file"])
+ self.mount_a.run_shell(["mkdir", "testdir"])
+ self.mount_a.run_shell(["ln", "dir/file", "testdir/hardlink"])
+
+ dir_ino = self.mount_a.path_to_ino("dir")
+
+ # Ensure everything is written to backing store
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(["flush", "journal"])
+
+ # Drop everything from the MDS cache
+ self.fs.fail()
+
+ self.fs.radosm(["rm", "{0:x}.00000000".format(dir_ino)])
+
+ self.fs.journal_tool(['journal', 'reset'], 0)
+ self.fs.set_joinable()
+ self.fs.wait_for_daemons()
+ self.mount_a.mount_wait()
+
+ # Check that touching the hardlink gives EIO
+ ran = self.mount_a.run_shell(["stat", "testdir/hardlink"], wait=False)
+ try:
+ ran.wait()
+ except CommandFailedError:
+ self.assertTrue("Input/output error" in ran.stderr.getvalue())
+
+ out_json = self.fs.run_scrub(["start", "/dir", "recursive"])
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+ # Check that an entry is created in the damage table
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "ls", '--format=json-pretty'))
+ self.assertEqual(len(damage), 3)
+ damage_types = set()
+ for i in range(0, 3):
+ damage_types.add(damage[i]['damage_type'])
+ self.assertIn("dir_frag", damage_types)
+ self.wait_until_true(lambda: self._is_MDS_damage(), timeout=100)
+
+ out_json = self.fs.run_scrub(["start", "/dir", "recursive,repair"])
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+ # Check that the entry is cleared from the damage table
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "ls", '--format=json-pretty'))
+ self.assertEqual(len(damage), 1)
+ self.assertNotEqual(damage[0]['damage_type'], "dir_frag")
+
+ self.mount_a.umount_wait()
+ self.fs.mds_asok(["flush", "journal"])
+ self.fs.fail()
+
+ # Run cephfs-data-scan
+ self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
+ self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()])
+ self.fs.data_scan(["scan_links"])
+
+ self.fs.set_joinable()
+ self.fs.wait_for_daemons()
+ self.mount_a.mount_wait()
+
+ out_json = self.fs.run_scrub(["start", "/dir", "recursive,repair"])
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "ls", '--format=json-pretty'))
+ self.assertEqual(len(damage), 0)
+ self.wait_until_true(lambda: not self._is_MDS_damage(), timeout=100)
+
+ def test_health_status_after_backtrace_repair(self):
+ """
+ Test that the damage health status is cleared
+ after the damaged backtrace is repaired
+ """
+ # Create a file for checks
+ self.mount_a.run_shell(["mkdir", "dir_test"])
+ self.mount_a.run_shell(["touch", "dir_test/file"])
+ file_ino = self.mount_a.path_to_ino("dir_test/file")
+
+ # That backtrace and layout are written after initial flush
+ self.fs.mds_asok(["flush", "journal"])
+ backtrace = self.fs.read_backtrace(file_ino)
+ self.assertEqual(['file', 'dir_test'],
+ [a['dname'] for a in backtrace['ancestors']])
+
+ # Corrupt the backtrace
+ self.fs._write_data_xattr(file_ino, "parent",
+ "The backtrace is corrupted")
+
+ out_json = self.fs.run_scrub(["start", "/", "recursive"])
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+ # Check that an entry for backtrace damage is created in the damage table
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "ls", '--format=json-pretty'))
+ self.assertEqual(len(damage), 1)
+ self.assertEqual(damage[0]['damage_type'], "backtrace")
+ self.wait_until_true(lambda: self._is_MDS_damage(), timeout=100)
+
+ out_json = self.fs.run_scrub(["start", "/", "repair,recursive,force"])
+ self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+ # Check that the entry is cleared from the damage table
+ damage = json.loads(
+ self.fs.mon_manager.raw_cluster_cmd(
+ 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+ "damage", "ls", '--format=json-pretty'))
+ self.assertEqual(len(damage), 0)
+ self.wait_until_true(lambda: not self._is_MDS_damage(), timeout=100)