summaryrefslogtreecommitdiffstats
path: root/qa/tasks/thrashosds.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--qa/tasks/thrashosds.py219
1 files changed, 219 insertions, 0 deletions
diff --git a/qa/tasks/thrashosds.py b/qa/tasks/thrashosds.py
new file mode 100644
index 00000000..253663f8
--- /dev/null
+++ b/qa/tasks/thrashosds.py
@@ -0,0 +1,219 @@
+"""
+Thrash -- Simulate random osd failures.
+"""
+import contextlib
+import logging
+from tasks import ceph_manager
+from teuthology import misc as teuthology
+
+
+log = logging.getLogger(__name__)
+
+@contextlib.contextmanager
+def task(ctx, config):
+ """
+ "Thrash" the OSDs by randomly marking them out/down (and then back
+ in) until the task is ended. This loops, and every op_delay
+ seconds it randomly chooses to add or remove an OSD (even odds)
+ unless there are fewer than min_out OSDs out of the cluster, or
+ more than min_in OSDs in the cluster.
+
+ All commands are run on mon0 and it stops when __exit__ is called.
+
+ The config is optional, and is a dict containing some or all of:
+
+ cluster: (default 'ceph') the name of the cluster to thrash
+
+ min_in: (default 4) the minimum number of OSDs to keep in the
+ cluster
+
+ min_out: (default 0) the minimum number of OSDs to keep out of the
+ cluster
+
+ op_delay: (5) the length of time to sleep between changing an
+ OSD's status
+
+ min_dead: (0) minimum number of osds to leave down/dead.
+
+ max_dead: (0) maximum number of osds to leave down/dead before waiting
+ for clean. This should probably be num_replicas - 1.
+
+ clean_interval: (60) the approximate length of time to loop before
+ waiting until the cluster goes clean. (In reality this is used
+ to probabilistically choose when to wait, and the method used
+ makes it closer to -- but not identical to -- the half-life.)
+
+ scrub_interval: (-1) the approximate length of time to loop before
+ waiting until a scrub is performed while cleaning. (In reality
+ this is used to probabilistically choose when to wait, and it
+ only applies to the cases where cleaning is being performed).
+ -1 is used to indicate that no scrubbing will be done.
+
+ chance_down: (0.4) the probability that the thrasher will mark an
+ OSD down rather than marking it out. (The thrasher will not
+ consider that OSD out of the cluster, since presently an OSD
+ wrongly marked down will mark itself back up again.) This value
+ can be either an integer (eg, 75) or a float probability (eg
+ 0.75).
+
+ chance_test_min_size: (0) chance to run test_pool_min_size,
+ which:
+ - kills all but one osd
+ - waits
+ - kills that osd
+ - revives all other osds
+ - verifies that the osds fully recover
+
+ timeout: (360) the number of seconds to wait for the cluster
+ to become clean after each cluster change. If this doesn't
+ happen within the timeout, an exception will be raised.
+
+ revive_timeout: (150) number of seconds to wait for an osd asok to
+ appear after attempting to revive the osd
+
+ thrash_primary_affinity: (true) randomly adjust primary-affinity
+
+ chance_pgnum_grow: (0) chance to increase a pool's size
+ chance_pgpnum_fix: (0) chance to adjust pgpnum to pg for a pool
+ pool_grow_by: (10) amount to increase pgnum by
+ chance_pgnum_shrink: (0) chance to decrease a pool's size
+ pool_shrink_by: (10) amount to decrease pgnum by
+ max_pgs_per_pool_osd: (1200) don't expand pools past this size per osd
+
+ pause_short: (3) duration of short pause
+ pause_long: (80) duration of long pause
+ pause_check_after: (50) assert osd down after this long
+ chance_inject_pause_short: (1) chance of injecting short stall
+ chance_inject_pause_long: (0) chance of injecting long stall
+
+ clean_wait: (0) duration to wait before resuming thrashing once clean
+
+ sighup_delay: (0.1) duration to delay between sending signal.SIGHUP to a
+ random live osd
+
+ powercycle: (false) whether to power cycle the node instead
+ of just the osd process. Note that this assumes that a single
+ osd is the only important process on the node.
+
+ bdev_inject_crash: (0) seconds to delay while inducing a synthetic crash.
+ the delay lets the BlockDevice "accept" more aio operations but blocks
+ any flush, and then eventually crashes (losing some or all ios). If 0,
+ no bdev failure injection is enabled.
+
+ bdev_inject_crash_probability: (.5) probability of doing a bdev failure
+ injection crash vs a normal OSD kill.
+
+ chance_test_backfill_full: (0) chance to simulate full disks stopping
+ backfill
+
+ chance_test_map_discontinuity: (0) chance to test map discontinuity
+ map_discontinuity_sleep_time: (40) time to wait for map trims
+
+ ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down
+ chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%)
+
+ optrack_toggle_delay: (2.0) duration to delay between toggling op tracker
+ enablement to all osds
+
+ dump_ops_enable: (true) continuously dump ops on all live osds
+
+ noscrub_toggle_delay: (2.0) duration to delay between toggling noscrub
+
+ disable_objectstore_tool_tests: (false) disable ceph_objectstore_tool based
+ tests
+
+ chance_thrash_cluster_full: .05
+
+ chance_thrash_pg_upmap: 1.0
+ chance_thrash_pg_upmap_items: 1.0
+
+ aggressive_pg_num_changes: (true) whether we should bypass the careful throttling of pg_num and pgp_num changes in mgr's adjust_pgs() controller
+
+ example:
+
+ tasks:
+ - ceph:
+ - thrashosds:
+ cluster: ceph
+ chance_down: 10
+ op_delay: 3
+ min_in: 1
+ timeout: 600
+ - interactive:
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'thrashosds task only accepts a dict for configuration'
+ # add default value for sighup_delay
+ config['sighup_delay'] = config.get('sighup_delay', 0.1)
+ # add default value for optrack_toggle_delay
+ config['optrack_toggle_delay'] = config.get('optrack_toggle_delay', 2.0)
+ # add default value for dump_ops_enable
+ config['dump_ops_enable'] = config.get('dump_ops_enable', "true")
+ # add default value for noscrub_toggle_delay
+ config['noscrub_toggle_delay'] = config.get('noscrub_toggle_delay', 2.0)
+ # add default value for random_eio
+ config['random_eio'] = config.get('random_eio', 0.0)
+ aggro = config.get('aggressive_pg_num_changes', True)
+
+ log.info("config is {config}".format(config=str(config)))
+
+ overrides = ctx.config.get('overrides', {})
+ log.info("overrides is {overrides}".format(overrides=str(overrides)))
+ teuthology.deep_merge(config, overrides.get('thrashosds', {}))
+ cluster = config.get('cluster', 'ceph')
+
+ log.info("config is {config}".format(config=str(config)))
+
+ if 'powercycle' in config:
+
+ # sync everyone first to avoid collateral damage to / etc.
+ log.info('Doing preliminary sync to avoid collateral damage...')
+ ctx.cluster.run(args=['sync'])
+
+ if 'ipmi_user' in ctx.teuthology_config:
+ for remote in ctx.cluster.remotes.keys():
+ log.debug('checking console status of %s' % remote.shortname)
+ if not remote.console.check_status():
+ log.warning('Failed to get console status for %s',
+ remote.shortname)
+
+ # check that all osd remotes have a valid console
+ osds = ctx.cluster.only(teuthology.is_type('osd', cluster))
+ for remote in osds.remotes.keys():
+ if not remote.console.has_ipmi_credentials:
+ raise Exception(
+ 'IPMI console required for powercycling, '
+ 'but not available on osd role: {r}'.format(
+ r=remote.name))
+
+ cluster_manager = ctx.managers[cluster]
+ for f in ['powercycle', 'bdev_inject_crash']:
+ if config.get(f):
+ cluster_manager.config[f] = config.get(f)
+
+ if aggro:
+ cluster_manager.raw_cluster_cmd(
+ 'config', 'set', 'mgr',
+ 'mgr_debug_aggressive_pg_num_changes',
+ 'true')
+
+ log.info('Beginning thrashosds...')
+ thrash_proc = ceph_manager.Thrasher(
+ cluster_manager,
+ config,
+ logger=log.getChild('thrasher')
+ )
+ try:
+ yield
+ finally:
+ log.info('joining thrashosds')
+ thrash_proc.do_join()
+ cluster_manager.wait_for_all_osds_up()
+ cluster_manager.flush_all_pg_stats()
+ cluster_manager.wait_for_recovery(config.get('timeout', 360))
+ if aggro:
+ cluster_manager.raw_cluster_cmd(
+ 'config', 'rm', 'mgr',
+ 'mgr_debug_aggressive_pg_num_changes')