summaryrefslogtreecommitdiffstats
path: root/qa/tasks/rbd_pwl_cache_recovery.py
diff options
context:
space:
mode:
Diffstat (limited to 'qa/tasks/rbd_pwl_cache_recovery.py')
-rw-r--r--qa/tasks/rbd_pwl_cache_recovery.py96
1 files changed, 96 insertions, 0 deletions
diff --git a/qa/tasks/rbd_pwl_cache_recovery.py b/qa/tasks/rbd_pwl_cache_recovery.py
new file mode 100644
index 000000000..e13c1f664
--- /dev/null
+++ b/qa/tasks/rbd_pwl_cache_recovery.py
@@ -0,0 +1,96 @@
+"""
+persistent write log cache recovery task
+"""
+import contextlib
+import logging
+import random
+import json
+import time
+
+from teuthology import misc as teuthology
+from teuthology import contextutil
+
+DEFAULT_NUM_ITERATIONS = 20
+IO_PATTERNS = ("full-seq", "rand")
+IO_SIZES = ('4K', '16K', '128K', '1024K')
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def thrashes_rbd_bench_on_persistent_cache(ctx, config):
+ """
+ thrashes rbd bench on persistent write log cache.
+ It can test recovery feature of persistent write log cache.
+ """
+ log.info("thrashes rbd bench on persistent write log cache")
+
+ client, client_config = list(config.items())[0]
+ (remote,) = ctx.cluster.only(client).remotes.keys()
+ client_config = client_config if client_config is not None else dict()
+ image_name = client_config.get('image_name', 'testimage')
+ num_iterations = client_config.get('num_iterations', DEFAULT_NUM_ITERATIONS)
+
+ for i in range(num_iterations):
+ log.info("start rbd bench")
+ # rbd bench could not specify the run time so set a large enough test size.
+ remote.run(
+ args=[
+ 'rbd', 'bench',
+ '--io-type', 'write',
+ '--io-pattern', random.choice(IO_PATTERNS),
+ '--io-size', random.choice(IO_SIZES),
+ '--io-total', '100G',
+ image_name,
+ ],
+ wait=False,
+ )
+ # Wait a few seconds for the rbd bench process to run
+ # and complete the pwl cache initialization
+ time.sleep(10)
+ log.info("dump cache state when rbd bench running.")
+ remote.sh(['rbd', 'status', image_name, '--format=json'])
+ log.info("sleep...")
+ time.sleep(random.randint(10, 60))
+ log.info("rbd bench crash.")
+ remote.run(
+ args=[
+ 'killall', '-9', 'rbd',
+ ],
+ check_status=False,
+ )
+ log.info("wait for watch timeout.")
+ time.sleep(40)
+ log.info("check cache state after crash.")
+ out = remote.sh(['rbd', 'status', image_name, '--format=json'])
+ rbd_status = json.loads(out)
+ assert len(rbd_status['watchers']) == 0
+ assert rbd_status['persistent_cache']['present'] == True
+ assert rbd_status['persistent_cache']['empty'] == False
+ assert rbd_status['persistent_cache']['clean'] == False
+ log.info("check dirty cache file.")
+ remote.run(
+ args=[
+ 'test', '-e', rbd_status['persistent_cache']['path'],
+ ]
+ )
+ try:
+ yield
+ finally:
+ log.info("cleanup")
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ This is task for testing persistent write log cache recovery.
+ """
+ assert isinstance(config, dict), \
+ "task rbd_pwl_cache_recovery only supports a dictionary for configuration"
+
+ managers = []
+ config = teuthology.replace_all_with_clients(ctx.cluster, config)
+ managers.append(
+ lambda: thrashes_rbd_bench_on_persistent_cache(ctx=ctx, config=config)
+ )
+
+ with contextutil.nested(*managers):
+ yield