diff options
Diffstat (limited to 'qa/tasks/ceph_manager.py')
-rw-r--r-- | qa/tasks/ceph_manager.py | 38 |
1 files changed, 30 insertions, 8 deletions
diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py index 516c409e8..e24965026 100644 --- a/qa/tasks/ceph_manager.py +++ b/qa/tasks/ceph_manager.py @@ -234,6 +234,7 @@ class OSDThrasher(Thrasher): self.chance_thrash_pg_upmap_items = self.config.get('chance_thrash_pg_upmap', 1.0) self.random_eio = self.config.get('random_eio') self.chance_force_recovery = self.config.get('chance_force_recovery', 0.3) + self.chance_reset_purged_snaps_last = self.config.get('chance_reset_purged_snaps_last', 0.3) num_osds = self.in_osds + self.out_osds self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * len(num_osds) @@ -779,6 +780,19 @@ class OSDThrasher(Thrasher): else: self.cancel_force_recovery() + def reset_purged_snaps_last(self): + """ + Run reset_purged_snaps_last + """ + self.log('reset_purged_snaps_last') + for osd in self.in_osds: + try: + self.ceph_manager.raw_cluster_cmd( + 'tell', "osd.%s" % (str(osd)), + 'reset_purged_snaps_last') + except CommandFailedError: + self.log('Failed to reset_purged_snaps_last, ignoring') + def all_up(self): """ Make sure all osds are up and not out. @@ -1229,6 +1243,8 @@ class OSDThrasher(Thrasher): actions.append((self.thrash_pg_upmap_items, self.chance_thrash_pg_upmap_items,)) if self.chance_force_recovery > 0: actions.append((self.force_cancel_recovery, self.chance_force_recovery)) + if self.chance_reset_purged_snaps_last > 0: + actions.append((self.reset_purged_snaps_last, self.chance_reset_purged_snaps_last)) for key in ['heartbeat_inject_failure', 'filestore_inject_stall']: for scenario in [ @@ -1524,11 +1540,9 @@ class CephManager: self.cephadm = cephadm self.testdir = teuthology.get_testdir(self.ctx) # prefix args for ceph cmds to be executed - pre = ['adjust-ulimits', 'ceph-coverage', - f'{self.testdir}/archive/coverage'] - self.CEPH_CMD = ['sudo'] + pre + ['timeout', '120', 'ceph', - '--cluster', self.cluster] - self.RADOS_CMD = pre + ['rados', '--cluster', self.cluster] + self.pre = ['adjust-ulimits', 'ceph-coverage', + f'{self.testdir}/archive/coverage'] + self.RADOS_CMD = self.pre + ['rados', '--cluster', self.cluster] self.run_ceph_w_prefix = ['sudo', 'daemon-helper', 'kill', 'ceph', '--cluster', self.cluster] @@ -1541,6 +1555,11 @@ class CephManager: except CommandFailedError: self.log('Failed to get pg_num from pool %s, ignoring' % pool) + def get_ceph_cmd(self, **kwargs): + timeout = kwargs.pop('timeout', 120) + return ['sudo'] + self.pre + ['timeout', f'{timeout}', 'ceph', + '--cluster', self.cluster] + def ceph(self, cmd, **kwargs): """ Simple Ceph admin command wrapper around run_cluster_cmd. @@ -1584,7 +1603,7 @@ class CephManager: stdout=StringIO(), check_status=kwargs.get('check_status', True)) else: - kwargs['args'] = prefixcmd + self.CEPH_CMD + kwargs['args'] + kwargs['args'] = prefixcmd + self.get_ceph_cmd(**kwargs) + kwargs['args'] return self.controller.run(**kwargs) def raw_cluster_cmd(self, *args, **kwargs) -> str: @@ -3152,11 +3171,14 @@ class CephManager: raise self.log("quorum is size %d" % size) - def get_mon_health(self, debug=False): + def get_mon_health(self, debug=False, detail=False): """ Extract all the monitor health information. """ - out = self.raw_cluster_cmd('health', '--format=json') + if detail: + out = self.raw_cluster_cmd('health', 'detail', '--format=json') + else: + out = self.raw_cluster_cmd('health', '--format=json') if debug: self.log('health:\n{h}'.format(h=out)) return json.loads(out) |