summaryrefslogtreecommitdiffstats
path: root/qa/tasks/cephfs/test_recovery_pool.py
diff options
context:
space:
mode:
Diffstat (limited to 'qa/tasks/cephfs/test_recovery_pool.py')
-rw-r--r--qa/tasks/cephfs/test_recovery_pool.py203
1 files changed, 203 insertions, 0 deletions
diff --git a/qa/tasks/cephfs/test_recovery_pool.py b/qa/tasks/cephfs/test_recovery_pool.py
new file mode 100644
index 000000000..9926b3670
--- /dev/null
+++ b/qa/tasks/cephfs/test_recovery_pool.py
@@ -0,0 +1,203 @@
+"""
+Test our tools for recovering metadata from the data pool into an alternate pool
+"""
+
+import logging
+import traceback
+from collections import namedtuple
+
+from teuthology.orchestra.run import CommandFailedError
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+
+log = logging.getLogger(__name__)
+
+
+ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
+
+
+class OverlayWorkload(object):
+ def __init__(self, orig_fs, recovery_fs, orig_mount, recovery_mount):
+ self._orig_fs = orig_fs
+ self._recovery_fs = recovery_fs
+ self._orig_mount = orig_mount
+ self._recovery_mount = recovery_mount
+ self._initial_state = None
+
+ # Accumulate backtraces for every failed validation, and return them. Backtraces
+ # are rather verbose, but we only see them when something breaks, and they
+ # let us see which check failed without having to decorate each check with
+ # a string
+ self._errors = []
+
+ def assert_equal(self, a, b):
+ try:
+ if a != b:
+ raise AssertionError("{0} != {1}".format(a, b))
+ except AssertionError as e:
+ self._errors.append(
+ ValidationError(e, traceback.format_exc(3))
+ )
+
+ def write(self):
+ """
+ Write the workload files to the mount
+ """
+ raise NotImplementedError()
+
+ def validate(self):
+ """
+ Read from the mount and validate that the workload files are present (i.e. have
+ survived or been reconstructed from the test scenario)
+ """
+ raise NotImplementedError()
+
+ def damage(self):
+ """
+ Damage the filesystem pools in ways that will be interesting to recover from. By
+ default just wipe everything in the metadata pool
+ """
+
+ pool = self._orig_fs.get_metadata_pool_name()
+ self._orig_fs.rados(["purge", pool, '--yes-i-really-really-mean-it'])
+
+ def flush(self):
+ """
+ Called after client unmount, after write: flush whatever you want
+ """
+ self._orig_fs.mds_asok(["flush", "journal"])
+ self._recovery_fs.mds_asok(["flush", "journal"])
+
+
+class SimpleOverlayWorkload(OverlayWorkload):
+ """
+ Single file, single directory, check that it gets recovered and so does its size
+ """
+ def write(self):
+ self._orig_mount.run_shell(["mkdir", "subdir"])
+ self._orig_mount.write_n_mb("subdir/sixmegs", 6)
+ self._initial_state = self._orig_mount.stat("subdir/sixmegs")
+
+ def validate(self):
+ self._recovery_mount.run_shell(["ls", "subdir"])
+ st = self._recovery_mount.stat("subdir/sixmegs")
+ self.assert_equal(st['st_size'], self._initial_state['st_size'])
+ return self._errors
+
+class TestRecoveryPool(CephFSTestCase):
+ MDSS_REQUIRED = 2
+ CLIENTS_REQUIRED = 2
+ REQUIRE_RECOVERY_FILESYSTEM = True
+
+ def is_marked_damaged(self, rank):
+ mds_map = self.fs.get_mds_map()
+ return rank in mds_map['damaged']
+
+ def _rebuild_metadata(self, workload, other_pool=None, workers=1):
+ """
+ That when all objects in metadata pool are removed, we can rebuild a metadata pool
+ based on the contents of a data pool, and a client can see and read our files.
+ """
+
+ # First, inject some files
+
+ workload.write()
+
+ # Unmount the client and flush the journal: the tool should also cope with
+ # situations where there is dirty metadata, but we'll test that separately
+ self.mount_a.umount_wait()
+ self.mount_b.umount_wait()
+ workload.flush()
+
+ # Create the alternate pool if requested
+ recovery_fs = self.recovery_fs.name
+ recovery_pool = self.recovery_fs.get_metadata_pool_name()
+ self.recovery_fs.data_scan(['init', '--force-init',
+ '--filesystem', recovery_fs,
+ '--alternate-pool', recovery_pool])
+ self.recovery_fs.mon_manager.raw_cluster_cmd('-s')
+ self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "session"])
+ self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "snap"])
+ self.recovery_fs.table_tool([recovery_fs + ":0", "reset", "inode"])
+
+ # Stop the MDS
+ self.fs.mds_stop() # otherwise MDS will join once the fs is reset
+ self.fs.fail()
+
+ # After recovery, we need the MDS to not be strict about stats (in production these options
+ # are off by default, but in QA we need to explicitly disable them)
+ self.fs.set_ceph_conf('mds', 'mds verify scatter', False)
+ self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False)
+
+ # Apply any data damage the workload wants
+ workload.damage()
+
+ # Reset the MDS map in case multiple ranks were in play: recovery procedure
+ # only understands how to rebuild metadata under rank 0
+ self.fs.reset()
+
+ self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
+ self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
+ self.fs.table_tool([self.fs.name + ":0", "reset", "inode"])
+
+ # Run the recovery procedure
+ if False:
+ with self.assertRaises(CommandFailedError):
+ # Normal reset should fail when no objects are present, we'll use --force instead
+ self.fs.journal_tool(["journal", "reset"], 0)
+
+ self.fs.data_scan(['scan_extents', '--alternate-pool',
+ recovery_pool, '--filesystem', self.fs.name,
+ self.fs.get_data_pool_name()])
+ self.fs.data_scan(['scan_inodes', '--alternate-pool',
+ recovery_pool, '--filesystem', self.fs.name,
+ '--force-corrupt', '--force-init',
+ self.fs.get_data_pool_name()])
+ self.fs.journal_tool(['event', 'recover_dentries', 'list',
+ '--alternate-pool', recovery_pool], 0)
+
+ self.fs.data_scan(['init', '--force-init', '--filesystem',
+ self.fs.name])
+ self.fs.data_scan(['scan_inodes', '--filesystem', self.fs.name,
+ '--force-corrupt', '--force-init',
+ self.fs.get_data_pool_name()])
+ self.fs.journal_tool(['event', 'recover_dentries', 'list'], 0)
+
+ self.recovery_fs.journal_tool(['journal', 'reset', '--force'], 0)
+ self.fs.journal_tool(['journal', 'reset', '--force'], 0)
+ self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired',
+ recovery_fs + ":0")
+
+ # Mark the MDS repaired
+ self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
+
+ # Start the MDS
+ self.fs.mds_restart()
+ self.fs.set_joinable()
+ self.recovery_fs.mds_restart()
+ self.fs.wait_for_daemons()
+ self.recovery_fs.wait_for_daemons()
+ status = self.recovery_fs.status()
+ for rank in self.recovery_fs.get_ranks(status=status):
+ self.fs.mon_manager.raw_cluster_cmd('tell', "mds." + rank['name'],
+ 'injectargs', '--debug-mds=20')
+ self.fs.rank_tell(['scrub', 'start', '/', 'recursive,repair'], rank=rank['rank'], status=status)
+ log.info(str(self.mds_cluster.status()))
+
+ # Mount a client
+ self.mount_a.mount_wait()
+ self.mount_b.mount_wait(cephfs_name=recovery_fs)
+
+ # See that the files are present and correct
+ errors = workload.validate()
+ if errors:
+ log.error("Validation errors found: {0}".format(len(errors)))
+ for e in errors:
+ log.error(e.exception)
+ log.error(e.backtrace)
+ raise AssertionError("Validation failed, first error: {0}\n{1}".format(
+ errors[0].exception, errors[0].backtrace
+ ))
+
+ def test_rebuild_simple(self):
+ self._rebuild_metadata(SimpleOverlayWorkload(self.fs, self.recovery_fs,
+ self.mount_a, self.mount_b))