176 files changed, 4752 insertions, 1168 deletions
diff --git a/qa/cephfs/begin/3-modules.yaml b/qa/cephfs/begin/3-modules.yaml
new file mode 100644
index 000000000..259473425
--- /dev/null
+++ b/qa/cephfs/begin/3-modules.yaml
@@ -0,0 +1,19 @@
+# Enable mgr modules now before any CephFS mounts are created by the mgr.  This
+# avoids the potential race of the mgr mounting CephFS and then getting failed
+# over by the monitors before the monitors have a chance to note the new client
+# session from the mgr beacon. In that case, the monitors will not blocklist
+# that client mount automatically so the MDS will eventually do the eviction
+# (and create a cluster log warning which we want to avoid).
+#
+# Note: ideally the mgr would gently stop mgr modules before respawning so that
+# the client mounts can be unmounted but this caused issues historically with
+# modules like the dashboard so an abrupt restart was chosen instead.
+
+mgrmodules:
+  sequential:
+    - print: "Enabling mgr modules"
+    # other fragments append to this
+
+tasks:
+  - sequential:
+      - mgrmodules
diff --git a/qa/cephfs/overrides/ignorelist_health.yaml b/qa/cephfs/overrides/ignorelist_health.yaml
index d8b819288..90811d6f2 100644
--- a/qa/cephfs/overrides/ignorelist_health.yaml
+++ b/qa/cephfs/overrides/ignorelist_health.yaml
@@ -1,13 +1,15 @@
 overrides:
   ceph:
     log-ignorelist:
+      - FS_DEGRADED
+      - FS_INLINE_DATA_DEPRECATED
+      - FS_WITH_FAILED_MDS
+      - MDS_ALL_DOWN
+      - MDS_DAMAGE
+      - MDS_DEGRADED
+      - MDS_FAILED
+      - MDS_INSUFFICIENT_STANDBY
+      - MDS_UP_LESS_THAN_MAX
+      - POOL_APP_NOT_ENABLED
       - overall HEALTH_
-      - \(FS_DEGRADED\)
-      - \(MDS_FAILED\)
-      - \(MDS_DEGRADED\)
-      - \(FS_WITH_FAILED_MDS\)
-      - \(MDS_DAMAGE\)
-      - \(MDS_ALL_DOWN\)
-      - \(MDS_UP_LESS_THAN_MAX\)
-      - \(FS_INLINE_DATA_DEPRECATED\)
-      - \(POOL_APP_NOT_ENABLED\)
+      - Replacing daemon
diff --git a/qa/cephfs/overrides/subvol_versions/create_subvol_version_v1.yaml b/qa/cephfs/overrides/subvol_versions/create_subvol_version_v1.yaml
new file mode 100644
index 000000000..120b2bf04
--- /dev/null
+++ b/qa/cephfs/overrides/subvol_versions/create_subvol_version_v1.yaml
@@ -0,0 +1,2 @@
+overrides:
+  subvolume_version: 1
diff --git a/qa/cephfs/overrides/subvol_versions/create_subvol_version_v2.yaml b/qa/cephfs/overrides/subvol_versions/create_subvol_version_v2.yaml
new file mode 100644
index 000000000..c8bcf95c0
--- /dev/null
+++ b/qa/cephfs/overrides/subvol_versions/create_subvol_version_v2.yaml
@@ -0,0 +1,2 @@
+overrides:
+  subvolume_version: 2
diff --git a/qa/suites/fs/cephadm/renamevolume/1-rename.yaml b/qa/suites/fs/cephadm/renamevolume/1-rename.yaml
index 7f9bc8906..e1d5b9b33 100644
--- a/qa/suites/fs/cephadm/renamevolume/1-rename.yaml
+++ b/qa/suites/fs/cephadm/renamevolume/1-rename.yaml
@@ -1,7 +1,11 @@
 tasks:
 - cephadm.shell:
     host.a:
+      - ceph fs fail foo
+      - ceph fs set foo refuse_client_session true
       - ceph fs volume rename foo bar --yes-i-really-mean-it
+      - ceph fs set bar joinable true
+      - ceph fs set bar refuse_client_session false
 - fs.ready:
     timeout: 300
 - cephadm.shell:
diff --git a/qa/suites/fs/full/tasks/mgr-osd-full.yaml b/qa/suites/fs/full/tasks/mgr-osd-full.yaml
index b4f673e39..a005f5203 100644
--- a/qa/suites/fs/full/tasks/mgr-osd-full.yaml
+++ b/qa/suites/fs/full/tasks/mgr-osd-full.yaml
@@ -12,7 +12,7 @@ overrides:
         debug mds: 20
       osd: # force bluestore since it's required for ec overwrites
         osd objectstore: bluestore
-        bluestore block size: 1073741824
+        bluestore block size: 2147483648
 tasks:
 - workunit:
     cleanup: true
diff --git a/qa/suites/fs/functional/subvol_versions/.qa b/qa/suites/fs/functional/subvol_versions/.qa
new file mode 120000
index 000000000..fea2489fd
--- /dev/null
+++ b/qa/suites/fs/functional/subvol_versions/.qa
@@ -0,0 +1 @@
+../.qa
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/subvol_versions/create_subvol_version_v1.yaml b/qa/suites/fs/functional/subvol_versions/create_subvol_version_v1.yaml
new file mode 120000
index 000000000..09cfdb59e
--- /dev/null
+++ b/qa/suites/fs/functional/subvol_versions/create_subvol_version_v1.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/subvol_versions/create_subvol_version_v1.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/subvol_versions/create_subvol_version_v2.yaml b/qa/suites/fs/functional/subvol_versions/create_subvol_version_v2.yaml
new file mode 120000
index 000000000..5a4de14e7
--- /dev/null
+++ b/qa/suites/fs/functional/subvol_versions/create_subvol_version_v2.yaml
@@ -0,0 +1 @@
+.qa/cephfs/overrides/subvol_versions/create_subvol_version_v2.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/functional/tasks/client-recovery.yaml b/qa/suites/fs/functional/tasks/client-recovery.yaml
index e67acc3ab..7ea93a367 100644
--- a/qa/suites/fs/functional/tasks/client-recovery.yaml
+++ b/qa/suites/fs/functional/tasks/client-recovery.yaml
@@ -9,6 +9,9 @@ overrides:
       - MDS_CLIENT_LATE_RELEASE
       - t responding to mclientcaps
       - file system flag refuse_client_session is set
+      - Degraded data redundancy
+      - MDS_CLIENTS_LAGGY
+      - Reduced data availability
 tasks:
   - cephfs_test_runner:
       fail_on_skip: false
diff --git a/qa/suites/fs/functional/tasks/snap-schedule.yaml b/qa/suites/fs/functional/tasks/snap-schedule.yaml
index f2e62b050..26922abed 100644
--- a/qa/suites/fs/functional/tasks/snap-schedule.yaml
+++ b/qa/suites/fs/functional/tasks/snap-schedule.yaml
@@ -6,7 +6,7 @@ overrides:
         debug ms: 1
         debug finisher: 20
         debug client: 20
-    log-whitelist:
+    log-ignorelist:
       - OSD full dropping all updates
       - OSD near full
       - pausewr flag
diff --git a/qa/suites/fs/functional/tasks/snap_schedule_snapdir.yaml b/qa/suites/fs/functional/tasks/snap_schedule_snapdir.yaml
index 7bbcf000f..2a175dbf1 100644
--- a/qa/suites/fs/functional/tasks/snap_schedule_snapdir.yaml
+++ b/qa/suites/fs/functional/tasks/snap_schedule_snapdir.yaml
@@ -6,7 +6,7 @@ overrides:
         debug ms: 1
         debug finisher: 20
         debug client: 20
-    log-whitelist:
+    log-ignorelist:
       - OSD full dropping all updates
       - OSD near full
       - pausewr flag
diff --git a/qa/suites/fs/mirror-ha/overrides/ignorelist_health.yaml b/qa/suites/fs/mirror-ha/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..4cb7d981d
--- /dev/null
+++ b/qa/suites/fs/mirror-ha/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+./.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror-ha/overrides/whitelist_health.yaml b/qa/suites/fs/mirror-ha/overrides/whitelist_health.yaml
deleted file mode 100644
index d40fa4cb8..000000000
--- a/qa/suites/fs/mirror-ha/overrides/whitelist_health.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-overrides:
-  ceph:
-    log-ignorelist:
-      - overall HEALTH_
-      - \(FS_DEGRADED\)
-      - \(MDS_FAILED\)
-      - \(MDS_DEGRADED\)
-      - \(FS_WITH_FAILED_MDS\)
-      - \(MDS_DAMAGE\)
-      - \(MDS_ALL_DOWN\)
-      - \(MDS_UP_LESS_THAN_MAX\)
-      - \(FS_INLINE_DATA_DEPRECATED\)
-      - Reduced data availability
-      - Degraded data redundancy
diff --git a/qa/suites/fs/mirror/overrides/ignorelist_health.yaml b/qa/suites/fs/mirror/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..4cb7d981d
--- /dev/null
+++ b/qa/suites/fs/mirror/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+./.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/mirror/overrides/whitelist_health.yaml b/qa/suites/fs/mirror/overrides/whitelist_health.yaml
deleted file mode 100644
index d40fa4cb8..000000000
--- a/qa/suites/fs/mirror/overrides/whitelist_health.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-overrides:
-  ceph:
-    log-ignorelist:
-      - overall HEALTH_
-      - \(FS_DEGRADED\)
-      - \(MDS_FAILED\)
-      - \(MDS_DEGRADED\)
-      - \(FS_WITH_FAILED_MDS\)
-      - \(MDS_DAMAGE\)
-      - \(MDS_ALL_DOWN\)
-      - \(MDS_UP_LESS_THAN_MAX\)
-      - \(FS_INLINE_DATA_DEPRECATED\)
-      - Reduced data availability
-      - Degraded data redundancy
diff --git a/qa/suites/fs/nfs/overrides/ignorelist_health.yaml b/qa/suites/fs/nfs/overrides/ignorelist_health.yaml
index 8bfe4dc6f..5cb891a95 100644..120000
--- a/qa/suites/fs/nfs/overrides/ignorelist_health.yaml
+++ b/qa/suites/fs/nfs/overrides/ignorelist_health.yaml
@@ -1,13 +1 @@
-overrides:
-  ceph:
-    log-ignorelist:
-      - overall HEALTH_
-      - \(FS_DEGRADED\)
-      - \(MDS_FAILED\)
-      - \(MDS_DEGRADED\)
-      - \(FS_WITH_FAILED_MDS\)
-      - \(MDS_DAMAGE\)
-      - \(MDS_ALL_DOWN\)
-      - \(MDS_UP_LESS_THAN_MAX\)
-      - \(FS_INLINE_DATA_DEPRECATED\)
-      - \(OSD_DOWN\)
+.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_upgrade.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_upgrade.yaml
new file mode 100644
index 000000000..713adb962
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_upgrade.yaml
@@ -0,0 +1,4 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - OSD_DOWN
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/quincy.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/quincy.yaml
new file mode 100644
index 000000000..4a21021c0
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/quincy.yaml
@@ -0,0 +1,32 @@
+meta:
+- desc: |
+   setup ceph/quincy
+
+tasks:
+- install:
+    branch: quincy
+    exclude_packages:
+      - ceph-volume
+- print: "**** done install task..."
+- cephadm:
+    image: quay.ceph.io/ceph-ci/ceph:quincy
+    roleless: true
+    cephadm_branch: quincy
+    cephadm_git_url: https://github.com/ceph/ceph
+    conf:
+      osd:
+        #set config option for which cls modules are allowed to be loaded / used
+        osd_class_load_list: "*"
+        osd_class_default_list: "*"
+- print: "**** done end installing quincy cephadm ..."
+- cephadm.shell:
+    host.a:
+      - ceph config set mgr mgr/cephadm/use_repo_digest true --force
+- print: "**** done cephadm.shell ceph config set mgr..."
+- cephadm.shell:
+    host.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
diff --git a/qa/suites/fs/workload/tasks/0-subvolume/no-subvolume.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/$
index e69de29bb..e69de29bb 100644
--- a/qa/suites/fs/workload/tasks/0-subvolume/no-subvolume.yaml
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/$
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/reef.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/reef.yaml
new file mode 100644
index 000000000..c53e8b55d
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/reef.yaml
@@ -0,0 +1,31 @@
+meta:
+- desc: |
+   setup ceph/reef
+
+tasks:
+- install:
+    branch: reef
+    exclude_packages:
+      - ceph-volume
+- print: "**** done install task..."
+- cephadm:
+    image: quay.ceph.io/ceph-ci/ceph:reef
+    roleless: true
+    compiled_cephadm_branch: reef
+    conf:
+      osd:
+        #set config option for which cls modules are allowed to be loaded / used
+        osd_class_load_list: "*"
+        osd_class_default_list: "*"
+- print: "**** done end installing reef cephadm ..."
+- cephadm.shell:
+    host.a:
+      - ceph config set mgr mgr/cephadm/use_repo_digest true --force
+- print: "**** done cephadm.shell ceph config set mgr..."
+- cephadm.shell:
+    host.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/v18.2.0.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/v18.2.0.yaml
new file mode 100644
index 000000000..98bb210d1
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/v18.2.0.yaml
@@ -0,0 +1,31 @@
+meta:
+- desc: |
+   setup ceph/v18.2.0
+
+tasks:
+- install:
+    tag: v18.2.0
+    exclude_packages:
+      - ceph-volume
+- print: "**** done install task..."
+- cephadm:
+    image: quay.io/ceph/ceph:v18.2.0
+    roleless: true
+    compiled_cephadm_branch: reef
+    conf:
+      osd:
+        #set config option for which cls modules are allowed to be loaded / used
+        osd_class_load_list: "*"
+        osd_class_default_list: "*"
+- print: "**** done end installing v18.2.0 cephadm ..."
+- cephadm.shell:
+    host.a:
+      - ceph config set mgr mgr/cephadm/use_repo_digest true --force
+- print: "**** done cephadm.shell ceph config set mgr..."
+- cephadm.shell:
+    host.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/v18.2.1.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/v18.2.1.yaml
new file mode 100644
index 000000000..ce45d9ea9
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/v18.2.1.yaml
@@ -0,0 +1,31 @@
+meta:
+- desc: |
+   setup ceph/v18.2.1
+
+tasks:
+- install:
+    tag: v18.2.1
+    exclude_packages:
+      - ceph-volume
+- print: "**** done install task..."
+- cephadm:
+    image: quay.io/ceph/ceph:v18.2.1
+    roleless: true
+    compiled_cephadm_branch: reef
+    conf:
+      osd:
+        #set config option for which cls modules are allowed to be loaded / used
+        osd_class_load_list: "*"
+        osd_class_default_list: "*"
+- print: "**** done end installing v18.2.1 cephadm ..."
+- cephadm.shell:
+    host.a:
+      - ceph config set mgr mgr/cephadm/use_repo_digest true --force
+- print: "**** done cephadm.shell ceph config set mgr..."
+- cephadm.shell:
+    host.a:
+      - ceph orch status
+      - ceph orch ps
+      - ceph orch ls
+      - ceph orch host ls
+      - ceph orch device ls
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client/fuse.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client/fuse.yaml
new file mode 100644
index 000000000..5318fd1a9
--- /dev/null
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client/fuse.yaml
@@ -0,0 +1,3 @@
+tasks:
+- ceph-fuse:
+- print: "**** done client"
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client/kclient.yaml
index 92b9dda84..92b9dda84 100644
--- a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client.yaml
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client/kclient.yaml
diff --git a/qa/suites/fs/valgrind/mirror/overrides/ignorelist_health.yaml b/qa/suites/fs/valgrind/mirror/overrides/ignorelist_health.yaml
new file mode 120000
index 000000000..4cb7d981d
--- /dev/null
+++ b/qa/suites/fs/valgrind/mirror/overrides/ignorelist_health.yaml
@@ -0,0 +1 @@
+./.qa/cephfs/overrides/ignorelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/valgrind/mirror/overrides/whitelist_health.yaml b/qa/suites/fs/valgrind/mirror/overrides/whitelist_health.yaml
deleted file mode 100644
index d40fa4cb8..000000000
--- a/qa/suites/fs/valgrind/mirror/overrides/whitelist_health.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-overrides:
-  ceph:
-    log-ignorelist:
-      - overall HEALTH_
-      - \(FS_DEGRADED\)
-      - \(MDS_FAILED\)
-      - \(MDS_DEGRADED\)
-      - \(FS_WITH_FAILED_MDS\)
-      - \(MDS_DAMAGE\)
-      - \(MDS_ALL_DOWN\)
-      - \(MDS_UP_LESS_THAN_MAX\)
-      - \(FS_INLINE_DATA_DEPRECATED\)
-      - Reduced data availability
-      - Degraded data redundancy
diff --git a/qa/suites/fs/workload/begin/3-modules.yaml b/qa/suites/fs/workload/begin/3-modules.yaml
new file mode 120000
index 000000000..1eba706a5
--- /dev/null
+++ b/qa/suites/fs/workload/begin/3-modules.yaml
@@ -0,0 +1 @@
+.qa/cephfs/begin/3-modules.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/workload/ranks/1.yaml b/qa/suites/fs/workload/ranks/1.yaml
index e69de29bb..f9e95daa9 100644
--- a/qa/suites/fs/workload/ranks/1.yaml
+++ b/qa/suites/fs/workload/ranks/1.yaml
@@ -0,0 +1,4 @@
+overrides:
+  ceph:
+    cephfs:
+      max_mds: 1
diff --git a/qa/suites/fs/workload/ranks/multi/balancer/automatic.yaml b/qa/suites/fs/workload/ranks/multi/balancer/automatic.yaml
new file mode 100644
index 000000000..020eaa4bf
--- /dev/null
+++ b/qa/suites/fs/workload/ranks/multi/balancer/automatic.yaml
@@ -0,0 +1,4 @@
+tasks:
+- exec:
+    mon.a:
+      - ceph fs set cephfs balance_automate true
diff --git a/qa/suites/fs/workload/ranks/multi/balancer/distributed.yaml.disabled b/qa/suites/fs/workload/ranks/multi/balancer/distributed.yaml.disabled
new file mode 100644
index 000000000..be06d5186
--- /dev/null
+++ b/qa/suites/fs/workload/ranks/multi/balancer/distributed.yaml.disabled
@@ -0,0 +1,6 @@
+# distributed pins would be interesting if we had workloads on multiple clients. We do not yet. So it's disabled.
+tasks:
+- exec:
+    mon.a:
+      - ceph fs set cephfs balance_automate false
+      - ceph fs subvolumegroup pin cephfs qa distributed 1
diff --git a/qa/suites/fs/workload/ranks/multi/balancer/random.yaml b/qa/suites/fs/workload/ranks/multi/balancer/random.yaml
new file mode 100644
index 000000000..977e83fc2
--- /dev/null
+++ b/qa/suites/fs/workload/ranks/multi/balancer/random.yaml
@@ -0,0 +1,10 @@
+overrides:
+  ceph:
+    conf:
+      mds:
+        mds_export_ephemeral_random_max: 0.10
+tasks:
+- exec:
+    mon.a:
+      - ceph fs set cephfs balance_automate false
+      - ceph fs subvolumegroup pin cephfs qa random 0.10
diff --git a/qa/suites/fs/workload/tasks/3-snaps/yes.yaml b/qa/suites/fs/workload/tasks/3-snaps/yes.yaml
index 598f7e215..69f53768d 100644
--- a/qa/suites/fs/workload/tasks/3-snaps/yes.yaml
+++ b/qa/suites/fs/workload/tasks/3-snaps/yes.yaml
@@ -1,3 +1,10 @@
+mgrmodules:
+  sequential:
+    - exec:
+        mon.a:
+          - ceph mgr module enable snap_schedule
+          - ceph config set mgr mgr/snap_schedule/allow_m_granularity true
+          - ceph config set mgr mgr/snap_schedule/dump_on_update true
 overrides:
   ceph:
     conf:
@@ -12,11 +19,8 @@ overrides:
 tasks:
 - exec:
     mon.a:
-      - ceph mgr module enable snap_schedule
-      - ceph config set mgr mgr/snap_schedule/allow_m_granularity true
-      - ceph config set mgr mgr/snap_schedule/dump_on_update true
-      - ceph fs snap-schedule add --fs=cephfs --path=/ --snap_schedule=1M
-      - ceph fs snap-schedule retention add --fs=cephfs --path=/ --retention-spec-or-period=6M3h
+      - ceph fs snap-schedule add --fs=cephfs --path=/ --snap_schedule=1m
+      - ceph fs snap-schedule retention add --fs=cephfs --path=/ --retention-spec-or-period=6m3h
       - ceph fs snap-schedule status --fs=cephfs --path=/
       - ceph fs snap-schedule list --fs=cephfs --path=/ --recursive=true
       - date +%s > START_TIME
diff --git a/qa/suites/rbd/nbd/% b/qa/suites/krbd/mirror/%
index e69de29bb..e69de29bb 100644
--- a/qa/suites/rbd/nbd/%
+++ b/qa/suites/krbd/mirror/%
diff --git a/qa/suites/rbd/nbd/.qa b/qa/suites/krbd/mirror/.qa
index a602a0353..a602a0353 120000
--- a/qa/suites/rbd/nbd/.qa
+++ b/qa/suites/krbd/mirror/.qa
diff --git a/qa/suites/krbd/mirror/bluestore-bitmap.yaml b/qa/suites/krbd/mirror/bluestore-bitmap.yaml
new file mode 120000
index 000000000..a59cf5175
--- /dev/null
+++ b/qa/suites/krbd/mirror/bluestore-bitmap.yaml
@@ -0,0 +1 @@
+.qa/objectstore/bluestore-bitmap.yaml
+\ No newline at end of file
diff --git a/qa/suites/rbd/nbd/cluster/.qa b/qa/suites/krbd/mirror/clusters/.qa
index a602a0353..a602a0353 120000
--- a/qa/suites/rbd/nbd/cluster/.qa
+++ b/qa/suites/krbd/mirror/clusters/.qa
diff --git a/qa/suites/krbd/mirror/clusters/2-node.yaml b/qa/suites/krbd/mirror/clusters/2-node.yaml
new file mode 100644
index 000000000..e5036ea72
--- /dev/null
+++ b/qa/suites/krbd/mirror/clusters/2-node.yaml
@@ -0,0 +1,17 @@
+meta:
+- desc: 2 ceph clusters with 1 mon, 1 mgr and 3 osd each
+roles:
+- - cluster1.mon.a
+  - cluster1.mgr.x
+  - cluster1.osd.0
+  - cluster1.osd.1
+  - cluster1.osd.2
+- - cluster2.mon.a
+  - cluster2.mgr.x
+  - cluster2.osd.0
+  - cluster2.osd.1
+  - cluster2.osd.2
+  - cluster1.client.mirror
+  - cluster1.client.mirror.0
+  - cluster2.client.mirror
+  - cluster2.client.mirror.0
diff --git a/qa/suites/krbd/mirror/conf.yaml b/qa/suites/krbd/mirror/conf.yaml
new file mode 100644
index 000000000..eb6d72a80
--- /dev/null
+++ b/qa/suites/krbd/mirror/conf.yaml
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        mon warn on pool no app: false
+        ms die on skipped message: false
diff --git a/qa/suites/rbd/nbd/workloads/.qa b/qa/suites/krbd/mirror/install/.qa
index a602a0353..a602a0353 120000
--- a/qa/suites/rbd/nbd/workloads/.qa
+++ b/qa/suites/krbd/mirror/install/.qa
diff --git a/qa/suites/krbd/mirror/install/ceph.yaml b/qa/suites/krbd/mirror/install/ceph.yaml
new file mode 100644
index 000000000..08bb1faa6
--- /dev/null
+++ b/qa/suites/krbd/mirror/install/ceph.yaml
@@ -0,0 +1,14 @@
+tasks:
+- install:
+    extra_packages:
+    - rbd-mirror
+- ceph:
+    cluster: cluster1
+- ceph:
+    cluster: cluster2
+- rbd-mirror:
+    client: cluster1.client.mirror.0
+    thrash: False
+- rbd-mirror:
+    client: cluster2.client.mirror.0
+    thrash: False
diff --git a/qa/suites/krbd/mirror/ms_mode$/.qa b/qa/suites/krbd/mirror/ms_mode$/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/mirror/ms_mode$/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/mirror/ms_mode$/crc-rxbounce.yaml b/qa/suites/krbd/mirror/ms_mode$/crc-rxbounce.yaml
new file mode 100644
index 000000000..4d27d0113
--- /dev/null
+++ b/qa/suites/krbd/mirror/ms_mode$/crc-rxbounce.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc,rxbounce
diff --git a/qa/suites/krbd/mirror/ms_mode$/crc.yaml b/qa/suites/krbd/mirror/ms_mode$/crc.yaml
new file mode 100644
index 000000000..3b072578f
--- /dev/null
+++ b/qa/suites/krbd/mirror/ms_mode$/crc.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=crc
diff --git a/qa/suites/krbd/mirror/ms_mode$/legacy-rxbounce.yaml b/qa/suites/krbd/mirror/ms_mode$/legacy-rxbounce.yaml
new file mode 100644
index 000000000..244e45cbc
--- /dev/null
+++ b/qa/suites/krbd/mirror/ms_mode$/legacy-rxbounce.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy,rxbounce
diff --git a/qa/suites/krbd/mirror/ms_mode$/legacy.yaml b/qa/suites/krbd/mirror/ms_mode$/legacy.yaml
new file mode 100644
index 000000000..0048dcb0c
--- /dev/null
+++ b/qa/suites/krbd/mirror/ms_mode$/legacy.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=legacy
diff --git a/qa/suites/krbd/mirror/ms_mode$/secure.yaml b/qa/suites/krbd/mirror/ms_mode$/secure.yaml
new file mode 100644
index 000000000..a735db18d
--- /dev/null
+++ b/qa/suites/krbd/mirror/ms_mode$/secure.yaml
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      client:
+        rbd default map options: ms_mode=secure
diff --git a/qa/suites/krbd/mirror/tasks/.qa b/qa/suites/krbd/mirror/tasks/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/krbd/mirror/tasks/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/krbd/mirror/tasks/compare-mirror-image-alternate-primary.yaml b/qa/suites/krbd/mirror/tasks/compare-mirror-image-alternate-primary.yaml
new file mode 100644
index 000000000..42ee5a274
--- /dev/null
+++ b/qa/suites/krbd/mirror/tasks/compare-mirror-image-alternate-primary.yaml
@@ -0,0 +1,14 @@
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+        - pv
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror:
+        - rbd/compare_mirror_image_alternate_primary.sh
+    env:
+      RBD_DEVICE_TYPE: 'krbd'
+      RBD_MIRROR_USE_RBD_MIRROR: '1'
+    timeout: 3h
diff --git a/qa/suites/krbd/mirror/tasks/compare-mirror-images.yaml b/qa/suites/krbd/mirror/tasks/compare-mirror-images.yaml
new file mode 100644
index 000000000..30d147de9
--- /dev/null
+++ b/qa/suites/krbd/mirror/tasks/compare-mirror-images.yaml
@@ -0,0 +1,14 @@
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+        - pv
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror:
+        - rbd/compare_mirror_images.sh
+    env:
+      RBD_DEVICE_TYPE: 'krbd'
+      RBD_MIRROR_USE_RBD_MIRROR: '1'
+    timeout: 3h
diff --git a/qa/suites/netsplit/ceph.yaml b/qa/suites/netsplit/ceph.yaml
index ddf54b3a3..7bdb78c9e 100644
--- a/qa/suites/netsplit/ceph.yaml
+++ b/qa/suites/netsplit/ceph.yaml
@@ -11,7 +11,7 @@ overrides:
         mon osdmap full prune interval: 2
         mon osdmap full prune txsize: 2
 # thrashing monitors may make mgr have trouble w/ its keepalive
-    log-whitelist:
+    log-ignorelist:
       - overall HEALTH_
       - \(MGR_DOWN\)
       - \(MON_DOWN\)
diff --git a/qa/suites/rbd/nbd/cluster/+ b/qa/suites/orch/cephadm/no-agent-workunits/%
index e69de29bb..e69de29bb 100644
--- a/qa/suites/rbd/nbd/cluster/+
+++ b/qa/suites/orch/cephadm/no-agent-workunits/%
diff --git a/qa/suites/orch/cephadm/no-agent-workunits/.qa b/qa/suites/orch/cephadm/no-agent-workunits/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/orch/cephadm/no-agent-workunits/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/no-agent-workunits/0-distro b/qa/suites/orch/cephadm/no-agent-workunits/0-distro
new file mode 120000
index 000000000..4b341719d
--- /dev/null
+++ b/qa/suites/orch/cephadm/no-agent-workunits/0-distro
@@ -0,0 +1 @@
+.qa/distros/container-hosts
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/no-agent-workunits/mon_election b/qa/suites/orch/cephadm/no-agent-workunits/mon_election
new file mode 120000
index 000000000..3f331e621
--- /dev/null
+++ b/qa/suites/orch/cephadm/no-agent-workunits/mon_election
@@ -0,0 +1 @@
+.qa/mon_election
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/workunits/task/test_adoption.yaml b/qa/suites/orch/cephadm/no-agent-workunits/task/test_adoption.yaml
index e04fc1eea..e04fc1eea 100644
--- a/qa/suites/orch/cephadm/workunits/task/test_adoption.yaml
+++ b/qa/suites/orch/cephadm/no-agent-workunits/task/test_adoption.yaml
diff --git a/qa/suites/orch/cephadm/no-agent-workunits/task/test_cephadm_timeout.yaml b/qa/suites/orch/cephadm/no-agent-workunits/task/test_cephadm_timeout.yaml
new file mode 100644
index 000000000..24b53d029
--- /dev/null
+++ b/qa/suites/orch/cephadm/no-agent-workunits/task/test_cephadm_timeout.yaml
@@ -0,0 +1,13 @@
+roles:
+- - host.a
+  - mon.a
+  - mgr.a
+  - osd.0
+  - client.0
+tasks:
+- install:
+- cephadm:
+- workunit:
+    clients:
+      client.0:
+        - cephadm/test_cephadm_timeout.py
+\ No newline at end of file
diff --git a/qa/suites/orch/cephadm/workunits/task/test_orch_cli.yaml b/qa/suites/orch/cephadm/no-agent-workunits/task/test_orch_cli.yaml
index ec65fb116..ec65fb116 100644
--- a/qa/suites/orch/cephadm/workunits/task/test_orch_cli.yaml
+++ b/qa/suites/orch/cephadm/no-agent-workunits/task/test_orch_cli.yaml
diff --git a/qa/suites/orch/cephadm/workunits/task/test_orch_cli_mon.yaml b/qa/suites/orch/cephadm/no-agent-workunits/task/test_orch_cli_mon.yaml
index 2a33dc839..2a33dc839 100644
--- a/qa/suites/orch/cephadm/workunits/task/test_orch_cli_mon.yaml
+++ b/qa/suites/orch/cephadm/no-agent-workunits/task/test_orch_cli_mon.yaml
diff --git a/qa/suites/orch/cephadm/workunits/task/test_extra_daemon_features.yaml b/qa/suites/orch/cephadm/workunits/task/test_extra_daemon_features.yaml
new file mode 100644
index 000000000..b5e0ec98f
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/task/test_extra_daemon_features.yaml
@@ -0,0 +1,74 @@
+roles:
+- - host.a
+  - mon.a
+  - mgr.a
+  - osd.0
+- - host.b
+  - mon.b
+  - mgr.b
+  - osd.1
+tasks:
+- install:
+- cephadm:
+- exec:
+    all-hosts:
+      - mkdir /etc/cephadm_testing
+- cephadm.apply:
+    specs:
+      - service_type: mon
+        placement:
+          host_pattern: '*'
+        extra_container_args:
+          - "--cpus=2"
+        extra_entrypoint_args:
+          - "--debug_ms 10"
+      - service_type: container
+        service_id: foo
+        placement:
+          host_pattern: '*'
+        spec:
+          image: "quay.io/fedora/fedora:latest"
+          entrypoint: "bash"
+        extra_container_args:
+          - "-v"
+          - "/etc/cephadm_testing:/root/cephadm_testing"
+        extra_entrypoint_args:
+          - "/root/write_thing_to_file.sh"
+          - "-c"
+          - "testing_custom_containers"
+          - "-o"
+          - "/root/cephadm_testing/testing.txt"
+        custom_configs:
+          - mount_path: "/root/write_thing_to_file.sh"
+            content: |
+              while getopts "o:c:" opt; do
+                case ${opt} in
+                o )
+                  OUT_FILE=${OPTARG}
+                  ;;
+                c )
+                  CONTENT=${OPTARG}
+                esac
+              done
+              echo $CONTENT > $OUT_FILE
+              sleep infinity
+- cephadm.wait_for_service:
+    service: mon
+- cephadm.wait_for_service:
+    service: container.foo
+- exec:
+    host.a:
+      - |
+        set -ex
+        FSID=$(/home/ubuntu/cephtest/cephadm shell -- ceph fsid)
+        sleep 60
+        # check extra container and entrypoint args written to mon unit run file
+        grep "\-\-cpus=2" /var/lib/ceph/$FSID/mon.*/unit.run
+        grep "\-\-debug_ms 10" /var/lib/ceph/$FSID/mon.*/unit.run
+        # check that custom container properly wrote content to file.
+        # This requires the custom config, extra container args, and
+        # entrypoint args to all be working in order for this to have
+        # been written. The container entrypoint was set up with custom_configs,
+        # the content and where to write to with the entrypoint args, and the mounting
+        # of the /etc/cephadm_testing dir with extra container args
+        grep "testing_custom_containers" /etc/cephadm_testing/testing.txt
diff --git a/qa/suites/orch/cephadm/workunits/task/test_host_drain.yaml b/qa/suites/orch/cephadm/workunits/task/test_host_drain.yaml
new file mode 100644
index 000000000..c195bc052
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/task/test_host_drain.yaml
@@ -0,0 +1,72 @@
+roles:
+- - host.a
+  - mon.a
+  - mgr.a
+  - osd.0
+  - osd.1
+- - host.b
+  - mon.b
+  - mgr.b
+  - osd.2
+  - osd.3
+- - host.c
+  - mon.c
+  - osd.4
+  - osd.5
+tasks:
+- install:
+- cephadm:
+- cephadm.shell:
+    host.a:
+      - |
+        set -ex
+        HOSTNAMES=$(ceph orch host ls --format json | jq -r '.[] | .hostname')
+        for host in $HOSTNAMES; do
+          # find the hostname for "host.c" which will have no mgr
+          HAS_MGRS=$(ceph orch ps --hostname ${host} --format json | jq 'any(.daemon_type == "mgr")')
+          if [ "$HAS_MGRS" == "false" ]; then
+            HOST_C="${host}"
+          fi
+        done
+        # One last thing to worry about before draining the host
+        # is that the teuthology test tends to put the explicit
+        # hostnames in the placement for the mon service.
+        # We want to make sure we can drain without providing
+        # --force and there is a check for the host being removed
+        # being listed explicitly in the placements. Therefore,
+        # we should remove it from the mon placement.
+        ceph orch ls mon --export > mon.yaml
+        sed /"$HOST_C"/d mon.yaml > mon_adjusted.yaml
+        ceph orch apply -i mon_adjusted.yaml
+        # now drain that host
+        ceph orch host drain $HOST_C --zap-osd-devices
+        # wait for drain to complete
+        HOST_C_DAEMONS=$(ceph orch ps --hostname $HOST_C)
+        while [ "$HOST_C_DAEMONS" != "No daemons reported" ]; do
+          sleep 15
+          HOST_C_DAEMONS=$(ceph orch ps --hostname $HOST_C)
+        done
+        # we want to check the ability to remove the host from
+        # the CRUSH map, so we should first verify the host is in
+        # the CRUSH map.
+        ceph osd getcrushmap -o compiled-crushmap
+        crushtool -d compiled-crushmap -o crushmap.txt
+        CRUSH_MAP=$(cat crushmap.txt)
+        if ! grep -q "$HOST_C" <<< "$CRUSH_MAP"; then
+          printf "Expected to see $HOST_C in CRUSH map. Saw:\n\n$CRUSH_MAP"
+          exit 1
+        fi
+        # If the drain was successful, we should be able to remove the
+        # host without force with no issues. If there are still daemons
+        # we will get a response telling us to drain the host and a
+        # non-zero return code
+        ceph orch host rm $HOST_C --rm-crush-entry
+        # verify we've successfully removed the host from the CRUSH map
+        sleep 30
+        ceph osd getcrushmap -o compiled-crushmap
+        crushtool -d compiled-crushmap -o crushmap.txt
+        CRUSH_MAP=$(cat crushmap.txt)
+        if grep -q "$HOST_C" <<< "$CRUSH_MAP"; then
+          printf "Saw $HOST_C in CRUSH map after it should have been removed.\n\n$CRUSH_MAP"
+          exit 1
+        fi
diff --git a/qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml b/qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml
index 31724f9e8..84abb702c 100644
--- a/qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml
+++ b/qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml
@@ -30,6 +30,7 @@ tasks:
       - slow request
       - unfound
       - \(POOL_APP_NOT_ENABLED\)
+      - enough copies available
     conf:
       osd:
         osd min pg log entries: 5
diff --git a/qa/suites/rados/singleton/all/mon-config.yaml b/qa/suites/rados/singleton/all/mon-config.yaml
index ab1eb81b0..5e36a34a6 100644
--- a/qa/suites/rados/singleton/all/mon-config.yaml
+++ b/qa/suites/rados/singleton/all/mon-config.yaml
@@ -6,7 +6,7 @@ roles:
   - osd.0
   - osd.1
   - osd.2
-  - client.0
+  - client.rgw
 openstack:
   - volumes: # attached to each instance
       count: 3
@@ -18,6 +18,7 @@ tasks:
       - sudo ceph config set mgr mgr_pool false --force
     log-ignorelist:
       - \(POOL_APP_NOT_ENABLED\)
+- rgw: [client.rgw]
 - workunit:
     clients:
       all:
diff --git a/qa/suites/rbd/device/% b/qa/suites/rbd/device/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/device/%
diff --git a/qa/suites/rbd/device/.qa b/qa/suites/rbd/device/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/device/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/nbd/base b/qa/suites/rbd/device/base
index fd10a859d..fd10a859d 120000
--- a/qa/suites/rbd/nbd/base
+++ b/qa/suites/rbd/device/base
diff --git a/qa/suites/rbd/device/cluster/+ b/qa/suites/rbd/device/cluster/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/rbd/device/cluster/+
diff --git a/qa/suites/rbd/device/cluster/.qa b/qa/suites/rbd/device/cluster/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/device/cluster/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/nbd/cluster/fixed-3.yaml b/qa/suites/rbd/device/cluster/fixed-3.yaml
index 182589152..182589152 100644
--- a/qa/suites/rbd/nbd/cluster/fixed-3.yaml
+++ b/qa/suites/rbd/device/cluster/fixed-3.yaml
diff --git a/qa/suites/rbd/nbd/cluster/openstack.yaml b/qa/suites/rbd/device/cluster/openstack.yaml
index 48becbb83..48becbb83 120000
--- a/qa/suites/rbd/nbd/cluster/openstack.yaml
+++ b/qa/suites/rbd/device/cluster/openstack.yaml
diff --git a/qa/suites/rbd/nbd/conf b/qa/suites/rbd/device/conf
index 4bc0fe86c..4bc0fe86c 120000
--- a/qa/suites/rbd/nbd/conf
+++ b/qa/suites/rbd/device/conf
diff --git a/qa/suites/rbd/nbd/msgr-failures b/qa/suites/rbd/device/msgr-failures
index 03689aa44..03689aa44 120000
--- a/qa/suites/rbd/nbd/msgr-failures
+++ b/qa/suites/rbd/device/msgr-failures
diff --git a/qa/suites/rbd/nbd/objectstore b/qa/suites/rbd/device/objectstore
index c40bd3261..c40bd3261 120000
--- a/qa/suites/rbd/nbd/objectstore
+++ b/qa/suites/rbd/device/objectstore
diff --git a/qa/suites/rbd/nbd/supported-random-distro$ b/qa/suites/rbd/device/supported-random-distro$
index 0862b4457..0862b4457 120000
--- a/qa/suites/rbd/nbd/supported-random-distro$
+++ b/qa/suites/rbd/device/supported-random-distro$
diff --git a/qa/suites/rbd/nbd/thrashers b/qa/suites/rbd/device/thrashers
index f461dadc3..f461dadc3 120000
--- a/qa/suites/rbd/nbd/thrashers
+++ b/qa/suites/rbd/device/thrashers
diff --git a/qa/suites/rbd/nbd/thrashosds-health.yaml b/qa/suites/rbd/device/thrashosds-health.yaml
index 9124eb1aa..9124eb1aa 120000
--- a/qa/suites/rbd/nbd/thrashosds-health.yaml
+++ b/qa/suites/rbd/device/thrashosds-health.yaml
diff --git a/qa/suites/rbd/device/workloads/.qa b/qa/suites/rbd/device/workloads/.qa
new file mode 120000
index 000000000..a602a0353
--- /dev/null
+++ b/qa/suites/rbd/device/workloads/.qa
@@ -0,0 +1 @@
+../.qa/
+\ No newline at end of file
diff --git a/qa/suites/rbd/device/workloads/diff-continuous-krbd.yaml b/qa/suites/rbd/device/workloads/diff-continuous-krbd.yaml
new file mode 100644
index 000000000..5907718d5
--- /dev/null
+++ b/qa/suites/rbd/device/workloads/diff-continuous-krbd.yaml
@@ -0,0 +1,12 @@
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+        - pv
+tasks:
+- workunit:
+    clients:
+      all:
+        - rbd/diff_continuous.sh
+    env:
+      RBD_DEVICE_TYPE: "krbd"
diff --git a/qa/suites/rbd/nbd/workloads/rbd_nbd_diff_continuous.yaml b/qa/suites/rbd/device/workloads/diff-continuous-nbd.yaml
index e0a7ebe33..e0a7ebe33 100644
--- a/qa/suites/rbd/nbd/workloads/rbd_nbd_diff_continuous.yaml
+++ b/qa/suites/rbd/device/workloads/diff-continuous-nbd.yaml
diff --git a/qa/suites/rbd/nbd/workloads/rbd_fsx_nbd.yaml b/qa/suites/rbd/device/workloads/rbd_fsx_nbd.yaml
index b5737671f..b5737671f 100644
--- a/qa/suites/rbd/nbd/workloads/rbd_fsx_nbd.yaml
+++ b/qa/suites/rbd/device/workloads/rbd_fsx_nbd.yaml
diff --git a/qa/suites/rbd/nbd/workloads/rbd_nbd.yaml b/qa/suites/rbd/device/workloads/rbd_nbd.yaml
index ededea024..ededea024 100644
--- a/qa/suites/rbd/nbd/workloads/rbd_nbd.yaml
+++ b/qa/suites/rbd/device/workloads/rbd_nbd.yaml
diff --git a/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-krbd.yaml b/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-krbd.yaml
new file mode 100644
index 000000000..771400d01
--- /dev/null
+++ b/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-krbd.yaml
@@ -0,0 +1,13 @@
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+        - pv
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror:
+        - rbd/compare_mirror_image_alternate_primary.sh
+    env:
+      RBD_DEVICE_TYPE: 'krbd'
+    timeout: 3h
diff --git a/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-nbd.yaml b/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-nbd.yaml
new file mode 100644
index 000000000..e87d0e8ce
--- /dev/null
+++ b/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-nbd.yaml
@@ -0,0 +1,15 @@
+overrides:
+  install:
+    ceph:
+      extra_packages:
+        - rbd-nbd
+      extra_system_packages:
+        - pv
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror:
+        - rbd/compare_mirror_image_alternate_primary.sh
+    env:
+      RBD_DEVICE_TYPE: 'nbd'
+    timeout: 3h
diff --git a/qa/suites/rbd/mirror/workloads/compare-mirror-images-krbd.yaml b/qa/suites/rbd/mirror/workloads/compare-mirror-images-krbd.yaml
new file mode 100644
index 000000000..fc161987f
--- /dev/null
+++ b/qa/suites/rbd/mirror/workloads/compare-mirror-images-krbd.yaml
@@ -0,0 +1,13 @@
+overrides:
+  install:
+    ceph:
+      extra_system_packages:
+        - pv
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror:
+        - rbd/compare_mirror_images.sh
+    env:
+      RBD_DEVICE_TYPE: 'krbd'
+    timeout: 3h
diff --git a/qa/suites/rbd/mirror/workloads/compare-mirror-images-nbd.yaml b/qa/suites/rbd/mirror/workloads/compare-mirror-images-nbd.yaml
new file mode 100644
index 000000000..ed02ed257
--- /dev/null
+++ b/qa/suites/rbd/mirror/workloads/compare-mirror-images-nbd.yaml
@@ -0,0 +1,15 @@
+overrides:
+  install:
+    ceph:
+      extra_packages:
+        - rbd-nbd
+      extra_system_packages:
+        - pv
+tasks:
+- workunit:
+    clients:
+      cluster1.client.mirror:
+        - rbd/compare_mirror_images.sh
+    env:
+      RBD_DEVICE_TYPE: 'nbd'
+    timeout: 3h
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/% b/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/%
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/point-to-point-upgrade.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/point-to-point-upgrade.yaml
new file mode 100644
index 000000000..443b89fcf
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/point-to-point-upgrade.yaml
@@ -0,0 +1,173 @@
+meta:
+- desc: |
+   Run ceph on two nodes, using one of them as a client,
+   with a separate client-only node.
+   Use xfs beneath the osds.
+   install ceph/reef v18.2.1 and the v18.2.x point versions
+   run workload and upgrade-sequence in parallel
+   (every point release should be tested)
+   run workload and upgrade-sequence in parallel
+   install ceph/reef  latest version
+   run workload and upgrade-sequence in parallel
+   Overall upgrade path is - reef-latest.point-1 => reef-latest.point => reef-latest
+overrides:
+  ceph:
+    log-ignorelist:
+    - reached quota
+    - scrub
+    - osd_map_max_advance
+    - wrongly marked
+    - FS_DEGRADED
+    - POOL_APP_NOT_ENABLED
+    - CACHE_POOL_NO_HIT_SET
+    - POOL_FULL
+    - SMALLER_PG
+    - pool\(s\) full
+    - OSD_DOWN
+    - missing hit_sets
+    - CACHE_POOL_NEAR_FULL
+    - PG_AVAILABILITY
+    - PG_DEGRADED
+    - application not enabled
+    - cache pools at or near target size
+    - filesystem is degraded
+    - OBJECT_MISPLACED
+    ### ref: https://tracker.ceph.com/issues/40251
+    #removed see ^ - failed to encode map
+
+    fs: xfs
+
+    conf:
+      global:
+        mon_warn_on_pool_no_app: false
+        mon_mds_skip_sanity: true
+      mon:
+        mon debug unsafe allow tier with nonempty snaps: true
+      osd:
+        osd map max advance: 1000
+        osd_class_default_list: "*"
+        osd_class_load_list: "*"
+      client:
+        rgw_crypt_require_ssl: false
+        rgw crypt s3 kms backend: testing
+        rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo=
+roles:
+- - mon.a
+  - mds.a
+  - osd.0
+  - osd.1
+  - osd.2
+  - mgr.x
+- - mon.b
+  - mon.c
+  - osd.3
+  - osd.4
+  - osd.5
+  - client.0
+- - client.1
+openstack:
+- volumes: # attached to each instance
+    count: 3
+    size: 30 # GB
+tasks:
+- print: "****  done reef about to install v18.2.0 "
+- install:
+    tag: v18.2.0
+    # line below can be removed its from jewel test
+    #exclude_packages: ['ceph-mgr','libcephfs2','libcephfs-devel','libcephfs-dev', 'librgw2']
+- print: "**** done v18.2.0 install"
+- ceph:
+   fs: xfs
+   add_osds_to_crush: true
+- print: "**** done ceph xfs"
+- sequential:
+   - workload
+- print: "**** done workload v18.2.0"
+
+
+#######  upgrade to v18.2.1
+- install.upgrade:
+    #exclude_packages: ['ceph-mgr','libcephfs2','libcephfs-devel','libcephfs-dev']
+    mon.a:
+      tag: v18.2.1
+    mon.b:
+      tag: v18.2.1
+- parallel:
+   - workload_reef
+   - upgrade-sequence_reef
+- print: "**** done parallel reef v18.2.1"
+
+####  upgrade to latest reef
+- install.upgrade:
+    mon.a:
+    mon.b:
+- parallel:
+   - workload_reef
+   - upgrade-sequence_reef
+- print: "**** done parallel reef branch"
+
+#######################
+workload:
+   sequential:
+   - workunit:
+       clients:
+         client.0:
+           - suites/blogbench.sh
+
+workload_reef:
+   full_sequential:
+   - workunit:
+       branch: reef
+       # tag: v18.2.1
+       clients:
+         client.1:
+         - rados/test.sh
+         - cls
+       env:
+         CLS_RBD_GTEST_FILTER: '*:-TestClsRbd.mirror_snapshot'
+   - print: "**** done rados/test.sh &  cls workload_reef"
+   - sequential:
+     - rgw: [client.0]
+     - print: "**** done rgw workload_reef"
+     - rbd_fsx:
+         clients: [client.0]
+         size: 134217728
+     - print: "**** done rbd_fsx workload_reef"
+
+upgrade-sequence_reef:
+   sequential:
+   - print: "**** done branch: reef install.upgrade"
+   - ceph.restart: [mds.a]
+   - sleep:
+       duration: 60
+   - ceph.restart: [osd.0]
+   - sleep:
+       duration: 30
+   - ceph.restart: [osd.1]
+   - sleep:
+       duration: 30
+   - ceph.restart: [osd.2]
+   - sleep:
+       duration: 30
+   - ceph.restart: [osd.3]
+   - sleep:
+       duration: 30
+   - ceph.restart: [osd.4]
+   - sleep:
+       duration: 30
+   - ceph.restart: [osd.5]
+   - sleep:
+       duration: 60
+   - ceph.restart: [mgr.x]
+   - sleep:
+       duration: 60
+   - ceph.restart: [mon.a]
+   - sleep:
+       duration: 60
+   - ceph.restart: [mon.b]
+   - sleep:
+       duration: 60
+   - ceph.restart: [mon.c]
+   - sleep:
+       duration: 60
+   - print: "**** done ceph.restart all reef branch mds/osd/mon"
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/supported-all-distro/centos_8.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/supported-all-distro/centos_8.yaml
new file mode 120000
index 000000000..bb4a6aaf3
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/supported-all-distro/centos_8.yaml
@@ -0,0 +1 @@
+../../../../../distros/supported-all-distro/centos_8.yaml
+\ No newline at end of file
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/supported-all-distro/ubuntu_latest.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/supported-all-distro/ubuntu_latest.yaml
new file mode 100644
index 000000000..f20398230
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/supported-all-distro/ubuntu_latest.yaml
@@ -0,0 +1,2 @@
+os_type: ubuntu
+os_version: "20.04"
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/% b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/%
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/%
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/+ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/+
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/openstack.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/openstack.yaml
new file mode 100644
index 000000000..5caffc353
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/openstack.yaml
@@ -0,0 +1,6 @@
+openstack:
+  - machine:
+      disk: 100 # GB
+  - volumes: # attached to each instance
+      count: 4
+      size: 30 # GB
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/start.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/start.yaml
new file mode 100644
index 000000000..1271edd8b
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/start.yaml
@@ -0,0 +1,33 @@
+meta:
+- desc: |
+   Run ceph on two nodes,
+   with a separate client-only node.
+   Use xfs beneath the osds.
+overrides:
+  ceph:
+    fs: xfs
+    log-ignorelist:
+      - overall HEALTH_
+      - \(MON_DOWN\)
+      - \(MGR_DOWN\)
+      ### ref: https://tracker.ceph.com/issues/40251
+      #removed see ^ - failed to encode map
+    conf:
+      global:
+        enable experimental unrecoverable data corrupting features: "*"
+      mon:
+        mon warn on osd down out interval zero: false
+roles:
+- - mon.a
+  - mon.b
+  - mon.c
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+- - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+- - client.0
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/1-ceph-install/reef.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/1-ceph-install/reef.yaml
new file mode 100644
index 000000000..0c7db6ae4
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/1-ceph-install/reef.yaml
@@ -0,0 +1,21 @@
+meta:
+- desc: |
+   install ceph/reef v18.2.0
+   Overall upgrade path is - reef-latest.point -1 => reef-latest
+tasks:
+- install:
+    tag: v18.2.0
+    exclude_packages: ['librados3']
+    extra_packages: ['librados2']
+- print: "**** done install reef v18.2.0"
+- ceph:
+- exec:
+    osd.0:
+      - ceph osd require-osd-release reef
+      - ceph osd set-require-min-compat-client reef
+- print: "**** done ceph"
+overrides:
+  ceph:
+    conf:
+      mon:
+        mon warn on osd down out interval zero: false
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/1.1.short_pg_log.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/1.1.short_pg_log.yaml
new file mode 100644
index 000000000..20cc101de
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/1.1.short_pg_log.yaml
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_min_pg_log_entries: 1
+        osd_max_pg_log_entries: 2
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/2-partial-upgrade/firsthalf.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/2-partial-upgrade/firsthalf.yaml
new file mode 100644
index 000000000..02ba5c1bb
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/2-partial-upgrade/firsthalf.yaml
@@ -0,0 +1,13 @@
+meta:
+- desc: |
+   install upgrade ceph/-x on one node only
+   1st half
+   restart : osd.0,1,2,3
+tasks:
+- install.upgrade:
+    osd.0:
+- print: "**** done install.upgrade osd.0"
+- ceph.restart:
+    daemons: [mon.a,mon.b,mon.c,mgr.x,osd.0,osd.1,osd.2,osd.3]
+    mon-health-to-clog: false
+- print: "**** done ceph.restart 1st half"
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/3-thrash/default.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/3-thrash/default.yaml
new file mode 100644
index 000000000..c739d8fea
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/3-thrash/default.yaml
@@ -0,0 +1,27 @@
+meta:
+- desc: |
+   randomly kill and revive osd
+   small chance to increase the number of pgs
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - wrongly marked me down
+    - objects unfound and apparently lost
+    - log bound mismatch
+    ### ref: https://tracker.ceph.com/issues/40251
+    - failed to encode map
+tasks:
+- parallel:
+  - stress-tasks
+stress-tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgpnum_fix: 1
+    chance_thrash_cluster_full: 0
+    chance_thrash_pg_upmap: 0
+    chance_thrash_pg_upmap_items: 0
+    disable_objectstore_tool_tests: true
+    chance_force_recovery: 0
+- print: "**** done thrashosds 3-thrash"
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/+ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/+
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/fsx.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/fsx.yaml
new file mode 100644
index 000000000..fd4081f23
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/fsx.yaml
@@ -0,0 +1,8 @@
+meta:
+- desc: |
+   run basic fsx tests for rbd
+stress-tasks:
+- rbd_fsx:
+    clients: [client.0]
+    size: 134217728
+- print: "**** done rbd_fsx 4-workload"
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/radosbench.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/radosbench.yaml
new file mode 100644
index 000000000..c545936c0
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/radosbench.yaml
@@ -0,0 +1,52 @@
+meta:
+- desc: |
+   run randomized correctness test for rados operations
+   generate write load with rados bench
+stress-tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+- print: "**** done radosbench 4-workload"
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd-cls.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd-cls.yaml
new file mode 100644
index 000000000..c0445533d
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd-cls.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   run basic cls tests for rbd
+stress-tasks:
+- workunit:
+    branch: reef
+    clients:
+      client.0:
+        - cls/test_cls_rbd.sh
+    env:
+      CLS_RBD_GTEST_FILTER: '*:-TestClsRbd.mirror_snapshot'
+- print: "**** done cls/test_cls_rbd.sh 4-workload"
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd-import-export.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd-import-export.yaml
new file mode 100644
index 000000000..a4bea35a4
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd-import-export.yaml
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   run basic import/export cli tests for rbd
+stress-tasks:
+- workunit:
+    branch: reef
+    clients:
+      client.0:
+        - rbd/import_export.sh
+    env:
+      RBD_CREATE_ARGS: --new-format
+- print: "**** done rbd/import_export.sh 4-workload"
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd_api.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd_api.yaml
new file mode 100644
index 000000000..025616655
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd_api.yaml
@@ -0,0 +1,18 @@
+meta:
+- desc: |
+   librbd C and C++ api tests
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(CACHE_POOL_NO_HIT_SET\)
+      - \(POOL_APP_NOT_ENABLED\)
+      - is full \(reached quota
+      - \(POOL_FULL\)
+stress-tasks:
+- workunit:
+     branch: reef
+     clients:
+        client.0:
+           - rbd/test_librbd.sh
+- print: "**** done rbd/test_librbd.sh 4-workload"
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/readwrite.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/readwrite.yaml
new file mode 100644
index 000000000..456868998
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/readwrite.yaml
@@ -0,0 +1,16 @@
+meta:
+- desc: |
+   randomized correctness test for rados operations on a replicated pool,
+   using only reads, writes, and deletes
+stress-tasks:
+- full_sequential:
+  - rados:
+      clients: [client.0]
+      ops: 4000
+      objects: 500
+      write_append_excl: false
+      op_weights:
+        read: 45
+        write: 45
+        delete: 10
+- print: "**** done rados/readwrite 4-workload"
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/snaps-few-objects.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/snaps-few-objects.yaml
new file mode 100644
index 000000000..ae232d867
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/snaps-few-objects.yaml
@@ -0,0 +1,18 @@
+meta:
+- desc: |
+   randomized correctness test for rados operations on a replicated pool with snapshot operations
+stress-tasks:
+- full_sequential:
+  - rados:
+      clients: [client.0]
+      ops: 4000
+      objects: 50
+      write_append_excl: false
+      op_weights:
+        read: 100
+        write: 100
+        delete: 50
+        snap_create: 50
+        snap_remove: 50
+        rollback: 50
+- print: "**** done rados/snaps-few-objects 4-workload"
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/5-finish-upgrade.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/5-finish-upgrade.yaml
new file mode 100644
index 000000000..803737c72
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/5-finish-upgrade.yaml
@@ -0,0 +1,8 @@
+tasks:
+- install.upgrade:
+    osd.4:
+    client.0:
+- ceph.restart:
+    daemons: [osd.4, osd.5, osd.6, osd.7]
+    wait-for-healthy: false
+    wait-for-osds-up: true
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/+ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/+
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/+
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/rbd-python.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/rbd-python.yaml
new file mode 100644
index 000000000..78e68dbdb
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/rbd-python.yaml
@@ -0,0 +1,10 @@
+meta:
+- desc: |
+   librbd python api tests
+tasks:
+- workunit:
+    branch: reef
+    clients:
+      client.0:
+        - rbd/test_librbd_python.sh
+- print: "**** done rbd/test_librbd_python.sh 7-workload"
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/snaps-many-objects.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/snaps-many-objects.yaml
new file mode 100644
index 000000000..805bf97c3
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/snaps-many-objects.yaml
@@ -0,0 +1,16 @@
+meta:
+- desc: |
+   randomized correctness test for rados operations on a replicated pool with snapshot operations
+tasks:
+- rados:
+    clients: [client.0]
+    ops: 4000
+    objects: 500
+    write_append_excl: false
+    op_weights:
+      read: 100
+      write: 100
+      delete: 50
+      snap_create: 50
+      snap_remove: 50
+      rollback: 50
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-bitmap.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-bitmap.yaml
new file mode 100644
index 000000000..b18e04bee
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-bitmap.yaml
@@ -0,0 +1,43 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        bluestore allocator: bitmap
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
+        bdev enable discard: true
+        bdev async discard: true
+  ceph-deploy:
+    fs: xfs
+    bluestore: yes
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+        bdev enable discard: true
+        bdev async discard: true
+
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-comp.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-comp.yaml
new file mode 100644
index 000000000..b408032fd
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-comp.yaml
@@ -0,0 +1,23 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore compression mode: aggressive
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-stupid.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-stupid.yaml
new file mode 100644
index 000000000..ca811f131
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-stupid.yaml
@@ -0,0 +1,43 @@
+overrides:
+  thrashosds:
+    bdev_inject_crash: 2
+    bdev_inject_crash_probability: .5
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        bluestore allocator: stupid
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+# this doesn't work with failures bc the log writes are not atomic across the two backends
+#        bluestore bluefs env mirror: true
+        bdev enable discard: true
+        bdev async discard: true
+  ceph-deploy:
+    fs: xfs
+    bluestore: yes
+    conf:
+      osd:
+        osd objectstore: bluestore
+        bluestore block size: 96636764160
+        debug bluestore: 20
+        debug bluefs: 20
+        debug rocksdb: 10
+        bluestore fsck on mount: true
+        # lower the full ratios since we can fill up a 100gb osd so quickly
+        mon osd full ratio: .9
+        mon osd backfillfull_ratio: .85
+        mon osd nearfull ratio: .8
+        osd failsafe full ratio: .95
+        bdev enable discard: true
+        bdev async discard: true
+
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/supported-all-distro/ubuntu_latest.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/supported-all-distro/ubuntu_latest.yaml
new file mode 100644
index 000000000..f20398230
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/supported-all-distro/ubuntu_latest.yaml
@@ -0,0 +1,2 @@
+os_type: ubuntu
+os_version: "20.04"
diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/thrashosds-health.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/thrashosds-health.yaml
new file mode 100644
index 000000000..9903fa578
--- /dev/null
+++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/thrashosds-health.yaml
@@ -0,0 +1,15 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - overall HEALTH_
+      - \(OSDMAP_FLAGS\)
+      - \(OSD_
+      - \(PG_
+      - \(POOL_
+      - \(CACHE_POOL_
+      - \(SMALLER_PGP_NUM\)
+      - \(OBJECT_
+      - \(SLOW_OPS\)
+      - \(REQUEST_SLOW\)
+      - \(TOO_FEW_PGS\)
+      - slow request
diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py
index 516c409e8..e24965026 100644
--- a/qa/tasks/ceph_manager.py
+++ b/qa/tasks/ceph_manager.py
@@ -234,6 +234,7 @@ class OSDThrasher(Thrasher):
         self.chance_thrash_pg_upmap_items = self.config.get('chance_thrash_pg_upmap', 1.0)
         self.random_eio = self.config.get('random_eio')
         self.chance_force_recovery = self.config.get('chance_force_recovery', 0.3)
+        self.chance_reset_purged_snaps_last = self.config.get('chance_reset_purged_snaps_last', 0.3)
 
         num_osds = self.in_osds + self.out_osds
         self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * len(num_osds)
@@ -779,6 +780,19 @@ class OSDThrasher(Thrasher):
         else:
            self.cancel_force_recovery()
 
+    def reset_purged_snaps_last(self):
+        """
+        Run reset_purged_snaps_last
+        """
+        self.log('reset_purged_snaps_last')
+        for osd in self.in_osds:
+            try:
+               self.ceph_manager.raw_cluster_cmd(
+               'tell', "osd.%s" % (str(osd)),
+               'reset_purged_snaps_last')
+            except CommandFailedError:
+                self.log('Failed to reset_purged_snaps_last, ignoring')
+
     def all_up(self):
         """
         Make sure all osds are up and not out.
@@ -1229,6 +1243,8 @@ class OSDThrasher(Thrasher):
             actions.append((self.thrash_pg_upmap_items, self.chance_thrash_pg_upmap_items,))
         if self.chance_force_recovery > 0:
             actions.append((self.force_cancel_recovery, self.chance_force_recovery))
+        if self.chance_reset_purged_snaps_last > 0:
+            actions.append((self.reset_purged_snaps_last, self.chance_reset_purged_snaps_last))
 
         for key in ['heartbeat_inject_failure', 'filestore_inject_stall']:
             for scenario in [
@@ -1524,11 +1540,9 @@ class CephManager:
         self.cephadm = cephadm
         self.testdir = teuthology.get_testdir(self.ctx)
         # prefix args for ceph cmds to be executed
-        pre = ['adjust-ulimits', 'ceph-coverage',
-               f'{self.testdir}/archive/coverage']
-        self.CEPH_CMD = ['sudo'] + pre + ['timeout', '120', 'ceph',
-                                          '--cluster', self.cluster]
-        self.RADOS_CMD = pre + ['rados', '--cluster', self.cluster]
+        self.pre = ['adjust-ulimits', 'ceph-coverage',
+                    f'{self.testdir}/archive/coverage']
+        self.RADOS_CMD = self.pre + ['rados', '--cluster', self.cluster]
         self.run_ceph_w_prefix = ['sudo', 'daemon-helper', 'kill', 'ceph',
                                   '--cluster', self.cluster]
 
@@ -1541,6 +1555,11 @@ class CephManager:
             except CommandFailedError:
                 self.log('Failed to get pg_num from pool %s, ignoring' % pool)
 
+    def get_ceph_cmd(self, **kwargs):
+        timeout = kwargs.pop('timeout', 120)
+        return ['sudo'] + self.pre + ['timeout', f'{timeout}', 'ceph',
+                                      '--cluster', self.cluster]
+
     def ceph(self, cmd, **kwargs):
         """
         Simple Ceph admin command wrapper around run_cluster_cmd.
@@ -1584,7 +1603,7 @@ class CephManager:
                            stdout=StringIO(),
                            check_status=kwargs.get('check_status', True))
         else:
-            kwargs['args'] = prefixcmd + self.CEPH_CMD + kwargs['args']
+            kwargs['args'] = prefixcmd + self.get_ceph_cmd(**kwargs) + kwargs['args']
             return self.controller.run(**kwargs)
 
     def raw_cluster_cmd(self, *args, **kwargs) -> str:
@@ -3152,11 +3171,14 @@ class CephManager:
                         raise
         self.log("quorum is size %d" % size)
 
-    def get_mon_health(self, debug=False):
+    def get_mon_health(self, debug=False, detail=False):
         """
         Extract all the monitor health information.
         """
-        out = self.raw_cluster_cmd('health', '--format=json')
+        if detail:
+            out = self.raw_cluster_cmd('health', 'detail', '--format=json')
+        else:
+            out = self.raw_cluster_cmd('health', '--format=json')
         if debug:
             self.log('health:\n{h}'.format(h=out))
         return json.loads(out)
diff --git a/qa/tasks/ceph_test_case.py b/qa/tasks/ceph_test_case.py
index 3f8a152d7..649c0e53c 100644
--- a/qa/tasks/ceph_test_case.py
+++ b/qa/tasks/ceph_test_case.py
@@ -2,6 +2,7 @@ from typing import Optional, TYPE_CHECKING
 import unittest
 import time
 import logging
+from io import StringIO
 
 from teuthology.exceptions import CommandFailedError
 
@@ -13,7 +14,106 @@ log = logging.getLogger(__name__)
 class TestTimeoutError(RuntimeError):
     pass
 
-class CephTestCase(unittest.TestCase):
+
+class RunCephCmd:
+
+    def run_ceph_cmd(self, *args, **kwargs):
+        """
+        *args and **kwargs must contain arguments that are accepted by
+        vstart_runner.LocalRemote._do_run() or teuhology.orchestra.run.run()
+        methods.
+        """
+        if kwargs.get('args') is None and args:
+            if len(args) == 1:
+                args = args[0]
+            kwargs['args'] = args
+        return self.mon_manager.run_cluster_cmd(**kwargs)
+
+    def get_ceph_cmd_result(self, *args, **kwargs):
+        """
+        *args and **kwargs must contain arguments that are accepted by
+        vstart_runner.LocalRemote._do_run() or teuhology.orchestra.run.run()
+        methods.
+        """
+        if kwargs.get('args') is None and args:
+            if len(args) == 1:
+                args = args[0]
+            kwargs['args'] = args
+        return self.run_ceph_cmd(**kwargs).exitstatus
+
+    def get_ceph_cmd_stdout(self, *args, **kwargs):
+        """
+        *args and **kwargs must contain arguments that are accepted by
+        vstart_runner.LocalRemote._do_run() or teuhology.orchestra.run.run()
+        methods.
+        """
+        if kwargs.get('args') is None and args:
+            if len(args) == 1:
+                args = args[0]
+            kwargs['args'] = args
+        kwargs['stdout'] = kwargs.pop('stdout', StringIO())
+        return self.run_ceph_cmd(**kwargs).stdout.getvalue()
+
+    def assert_retval(self, proc_retval, exp_retval):
+        msg = (f'expected return value: {exp_retval}\n'
+               f'received return value: {proc_retval}\n')
+        assert proc_retval == exp_retval, msg
+
+    def _verify(self, proc, exp_retval=None, exp_errmsgs=None):
+        if exp_retval is None and exp_errmsgs is None:
+            raise RuntimeError('Method didn\'t get enough parameters. Pass '
+                               'return value or error message expected from '
+                               'the command/process.')
+
+        if exp_retval is not None:
+            self.assert_retval(proc.returncode, exp_retval)
+        if exp_errmsgs is None:
+            return
+
+        if isinstance(exp_errmsgs, str):
+            exp_errmsgs = (exp_errmsgs, )
+        exp_errmsgs = tuple([e.lower() for e in exp_errmsgs])
+
+        proc_stderr = proc.stderr.getvalue().lower()
+        msg = ('didn\'t find any of the expected string in stderr.\n'
+               f'expected string: {exp_errmsgs}\n'
+               f'received error message: {proc_stderr}\n'
+               'note: received error message is converted to lowercase')
+        for e in exp_errmsgs:
+            if e in proc_stderr:
+                break
+        # this else is meant for the for loop above.
+        else:
+            assert False, msg
+
+    def negtest_ceph_cmd(self, args, retval=None, errmsgs=None, **kwargs):
+        """
+        Conduct a negative test for the given Ceph command.
+
+        retval and errmsgs are parameters to confirm the cause of command
+        failure.
+
+        *args and **kwargs must contain arguments that are accepted by
+        vstart_runner.LocalRemote._do_run() or teuhology.orchestra.run.run()
+        methods.
+
+        NOTE: errmsgs is expected to be a tuple, but in case there's only one
+        error message, it can also be a string. This method will add the string
+        to a tuple internally.
+        """
+        kwargs['args'] = args
+        # execution is needed to not halt on command failure because we are
+        # conducting negative testing
+        kwargs['check_status'] = False
+        # stderr is needed to check for expected error messages.
+        kwargs['stderr'] = StringIO()
+
+        proc = self.run_ceph_cmd(**kwargs)
+        self._verify(proc, retval, errmsgs)
+        return proc
+
+
+class CephTestCase(unittest.TestCase, RunCephCmd):
     """
     For test tasks that want to define a structured set of
     tests implemented in python.  Subclass this with appropriate
@@ -36,9 +136,23 @@ class CephTestCase(unittest.TestCase):
     # their special needs.  If not met, tests will be skipped.
     REQUIRE_MEMSTORE = False
 
+    def _init_mon_manager(self):
+        # if vstart_runner.py has invoked this code
+        if 'Local' in str(type(self.ceph_cluster)):
+            from tasks.vstart_runner import LocalCephManager
+            self.mon_manager = LocalCephManager(ctx=self.ctx)
+        # else teuthology has invoked this code
+        else:
+            from tasks.ceph_manager import CephManager
+            self.mon_manager = CephManager(self.ceph_cluster.admin_remote,
+                ctx=self.ctx, logger=log.getChild('ceph_manager'))
+
     def setUp(self):
         self._mon_configs_set = set()
 
+        self._init_mon_manager()
+        self.admin_remote = self.ceph_cluster.admin_remote
+
         self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
             "Starting test {0}".format(self.id()))
 
@@ -148,12 +262,14 @@ class CephTestCase(unittest.TestCase):
 
         return ContextManager()
 
-    def wait_for_health(self, pattern, timeout):
+    def wait_for_health(self, pattern, timeout, check_in_detail=None):
         """
         Wait until 'ceph health' contains messages matching the pattern
+        Also check if @check_in_detail matches detailed health messages
+        only when @pattern is a code string.
         """
         def seen_health_warning():
-            health = self.ceph_cluster.mon_manager.get_mon_health()
+            health = self.ceph_cluster.mon_manager.get_mon_health(debug=False, detail=bool(check_in_detail))
             codes = [s for s in health['checks']]
             summary_strings = [s[1]['summary']['message'] for s in health['checks'].items()]
             if len(summary_strings) == 0:
@@ -164,7 +280,16 @@ class CephTestCase(unittest.TestCase):
                     if pattern in ss:
                          return True
                 if pattern in codes:
-                    return True
+                    if not check_in_detail:
+                        return True
+                    # check if the string is in detail list if asked
+                    detail_strings = [ss['message'] for ss in \
+                                      [s for s in health['checks'][pattern]['detail']]]
+                    log.debug(f'detail_strings: {detail_strings}')
+                    for ds in detail_strings:
+                        if check_in_detail in ds:
+                            return True
+                    log.debug(f'detail string "{check_in_detail}" not found')
 
             log.debug("Not found expected summary strings yet ({0})".format(summary_strings))
             return False
diff --git a/qa/tasks/cephfs/caps_helper.py b/qa/tasks/cephfs/caps_helper.py
index ac9bc4401..1ead57b71 100644
--- a/qa/tasks/cephfs/caps_helper.py
+++ b/qa/tasks/cephfs/caps_helper.py
@@ -160,11 +160,11 @@ class CapTester(CephFSTestCase):
         else:
             raise RuntimeError(f'perm = {perm}\nIt should be "r" or "rw".')
 
-    def conduct_pos_test_for_read_caps(self):
+    def conduct_pos_test_for_read_caps(self, sudo_read=False):
         for mount, path, data in self.test_set:
             log.info(f'test read perm: read file {path} and expect data '
                      f'"{data}"')
-            contents = mount.read_file(path)
+            contents = mount.read_file(path, sudo_read)
             self.assertEqual(data, contents)
             log.info(f'read perm was tested successfully: "{data}" was '
                      f'successfully read from path {path}')
@@ -193,3 +193,32 @@ class CapTester(CephFSTestCase):
             cmdargs.pop(-1)
             log.info('absence of write perm was tested successfully: '
                      f'failed to be write data to file {path}.')
+
+    def _conduct_neg_test_for_root_squash_caps(self, _cmdargs, sudo_write=False):
+        possible_errmsgs = ('permission denied', 'operation not permitted')
+        cmdargs = ['sudo'] if sudo_write else ['']
+        cmdargs += _cmdargs
+
+        for mount, path, data in self.test_set:
+            log.info(f'test absence of {_cmdargs[0]} perm: expect failure {path}.')
+
+            # open the file and hold it. The MDS will issue CEPH_CAP_EXCL_*
+            # to mount
+            proc = mount.open_background(path)
+            cmdargs.append(path)
+            mount.negtestcmd(args=cmdargs, retval=1, errmsgs=possible_errmsgs)
+            cmdargs.pop(-1)
+            mount._kill_background(proc)
+            log.info(f'absence of {_cmdargs[0]} perm was tested successfully')
+
+    def conduct_neg_test_for_chown_caps(self, sudo_write=True):
+        # flip ownership to nobody. assumption: nobody's id is 65534
+        cmdargs = ['chown', '-h', '65534:65534']
+        self._conduct_neg_test_for_root_squash_caps(cmdargs, sudo_write)
+
+    def conduct_neg_test_for_truncate_caps(self, sudo_write=True):
+        cmdargs = ['truncate', '-s', '10GB']
+        self._conduct_neg_test_for_root_squash_caps(cmdargs, sudo_write)
+
+    def conduct_pos_test_for_open_caps(self, sudo_read=True):
+        self.conduct_pos_test_for_read_caps(sudo_read)
diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py
index d2688929c..f26b598aa 100644
--- a/qa/tasks/cephfs/cephfs_test_case.py
+++ b/qa/tasks/cephfs/cephfs_test_case.py
@@ -3,8 +3,6 @@ import logging
 import os
 import re
 
-from shlex import split as shlex_split
-
 from tasks.ceph_test_case import CephTestCase
 
 from teuthology import contextutil
@@ -96,22 +94,22 @@ class CephFSTestCase(CephTestCase):
         # In case anything is in the OSD blocklist list, clear it out.  This is to avoid
         # the OSD map changing in the background (due to blocklist expiry) while tests run.
         try:
-            self.mds_cluster.mon_manager.run_cluster_cmd(args="osd blocklist clear")
+            self.run_ceph_cmd("osd blocklist clear")
         except CommandFailedError:
             # Fallback for older Ceph cluster
             try:
-                blocklist = json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd("osd",
-                                      "dump", "--format=json-pretty"))['blocklist']
+                blocklist = json.loads(self.get_ceph_cmd_stdout("osd",
+                    "dump", "--format=json-pretty"))['blocklist']
                 log.info(f"Removing {len(blocklist)} blocklist entries")
                 for addr, blocklisted_at in blocklist.items():
-                    self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blocklist", "rm", addr)
+                    self.run_ceph_cmd("osd", "blocklist", "rm", addr)
             except KeyError:
                 # Fallback for more older Ceph clusters, who will use 'blacklist' instead.
-                blacklist = json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd("osd",
-                                      "dump", "--format=json-pretty"))['blacklist']
+                blacklist = json.loads(self.get_ceph_cmd_stdout("osd",
+                    "dump", "--format=json-pretty"))['blacklist']
                 log.info(f"Removing {len(blacklist)} blacklist entries")
                 for addr, blocklisted_at in blacklist.items():
-                    self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blacklist", "rm", addr)
+                    self.run_ceph_cmd("osd", "blacklist", "rm", addr)
 
     def setUp(self):
         super(CephFSTestCase, self).setUp()
@@ -160,7 +158,7 @@ class CephFSTestCase(CephTestCase):
         for entry in self.auth_list():
             ent_type, ent_id = entry['entity'].split(".")
             if ent_type == "client" and ent_id not in client_mount_ids and not (ent_id == "admin" or ent_id[:6] == 'mirror'):
-                self.mds_cluster.mon_manager.raw_cluster_cmd("auth", "del", entry['entity'])
+                self.run_ceph_cmd("auth", "del", entry['entity'])
 
         if self.REQUIRE_FILESYSTEM:
             self.fs = self.mds_cluster.newfs(create=True)
@@ -171,11 +169,11 @@ class CephFSTestCase(CephTestCase):
                        'osd', f'allow rw tag cephfs data={self.fs.name}',
                        'mds', 'allow']
 
-                if self.run_cluster_cmd_result(cmd) == 0:
+                if self.get_ceph_cmd_result(*cmd) == 0:
                     break
 
                 cmd[1] = 'add'
-                if self.run_cluster_cmd_result(cmd) != 0:
+                if self.get_ceph_cmd_result(*cmd) != 0:
                     raise RuntimeError(f'Failed to create new client {cmd[2]}')
 
             # wait for ranks to become active
@@ -188,9 +186,8 @@ class CephFSTestCase(CephTestCase):
         if self.REQUIRE_BACKUP_FILESYSTEM:
             if not self.REQUIRE_FILESYSTEM:
                 self.skipTest("backup filesystem requires a primary filesystem as well")
-            self.fs.mon_manager.raw_cluster_cmd('fs', 'flag', 'set',
-                                                'enable_multiple', 'true',
-                                                '--yes-i-really-mean-it')
+            self.run_ceph_cmd('fs', 'flag', 'set', 'enable_multiple', 'true',
+                              '--yes-i-really-mean-it')
             self.backup_fs = self.mds_cluster.newfs(name="backup_fs")
             self.backup_fs.wait_for_daemons()
 
@@ -226,9 +223,8 @@ class CephFSTestCase(CephTestCase):
         """
         Convenience wrapper on "ceph auth ls"
         """
-        return json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd(
-            "auth", "ls", "--format=json-pretty"
-        ))['auth_dump']
+        return json.loads(self.get_ceph_cmd_stdout("auth", "ls",
+            "--format=json-pretty"))['auth_dump']
 
     def assert_session_count(self, expected, ls_data=None, mds_id=None):
         if ls_data is None:
@@ -411,16 +407,6 @@ class CephFSTestCase(CephTestCase):
         except contextutil.MaxWhileTries as e:
             raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) from e
 
-    def run_cluster_cmd(self, cmd):
-        if isinstance(cmd, str):
-            cmd = shlex_split(cmd)
-        return self.fs.mon_manager.raw_cluster_cmd(*cmd)
-
-    def run_cluster_cmd_result(self, cmd):
-        if isinstance(cmd, str):
-            cmd = shlex_split(cmd)
-        return self.fs.mon_manager.raw_cluster_cmd_result(*cmd)
-
     def create_client(self, client_id, moncap=None, osdcap=None, mdscap=None):
         if not (moncap or osdcap or mdscap):
             if self.fs:
@@ -438,5 +424,5 @@ class CephFSTestCase(CephTestCase):
         if mdscap:
             cmd += ['mds', mdscap]
 
-        self.run_cluster_cmd(cmd)
-        return self.run_cluster_cmd(f'auth get {self.client_name}')
+        self.run_ceph_cmd(*cmd)
+        return self.run_ceph_cmd(f'auth get {self.client_name}')
diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py
index 777ba8249..dc314efa8 100644
--- a/qa/tasks/cephfs/filesystem.py
+++ b/qa/tasks/cephfs/filesystem.py
@@ -17,8 +17,10 @@ from teuthology import misc
 from teuthology.nuke import clear_firewall
 from teuthology.parallel import parallel
 from teuthology import contextutil
+
 from tasks.ceph_manager import write_conf
-from tasks import ceph_manager
+from tasks.ceph_manager import CephManager
+from tasks.ceph_test_case import RunCephCmd
 
 
 log = logging.getLogger(__name__)
@@ -66,16 +68,16 @@ class FSMissing(Exception):
     def __str__(self):
         return f"File system {self.ident} does not exist in the map"
 
-class FSStatus(object):
+class FSStatus(RunCephCmd):
     """
     Operations on a snapshot of the FSMap.
     """
     def __init__(self, mon_manager, epoch=None):
-        self.mon = mon_manager
+        self.mon_manager = mon_manager
         cmd = ["fs", "dump", "--format=json"]
         if epoch is not None:
             cmd.append(str(epoch))
-        self.map = json.loads(self.mon.raw_cluster_cmd(*cmd))
+        self.map = json.loads(self.get_ceph_cmd_stdout(*cmd))
 
     def __str__(self):
         return json.dumps(self.map, indent = 2, sort_keys = True)
@@ -216,7 +218,7 @@ class FSStatus(object):
         #all matching
         return False
 
-class CephCluster(object):
+class CephCluster(RunCephCmd):
     @property
     def admin_remote(self):
         first_mon = misc.get_first_mon(self._ctx, None)
@@ -225,7 +227,8 @@ class CephCluster(object):
 
     def __init__(self, ctx) -> None:
         self._ctx = ctx
-        self.mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager'))
+        self.mon_manager = CephManager(self.admin_remote, ctx=ctx,
+                                       logger=log.getChild('ceph_manager'))
 
     def get_config(self, key, service_type=None):
         """
@@ -261,8 +264,14 @@ class CephCluster(object):
                      "-Infinity": -float("inf")}
                 return c[value]
 
-            j = json.loads(response_data.replace('inf', 'Infinity'),
-                           parse_constant=get_nonnumeric_values)
+            
+            j = {}
+            try:
+                j = json.loads(response_data.replace('inf', 'Infinity'),
+                            parse_constant=get_nonnumeric_values)
+            except json.decoder.JSONDecodeError:
+                raise RuntimeError(response_data) # assume it is an error message, pass it up
+            
             pretty = json.dumps(j, sort_keys=True, indent=2)
             log.debug(f"_json_asok output\n{pretty}")
             return j
@@ -271,7 +280,7 @@ class CephCluster(object):
             return None
 
     def is_addr_blocklisted(self, addr):
-        blocklist = json.loads(self.mon_manager.raw_cluster_cmd(
+        blocklist = json.loads(self.get_ceph_cmd_stdout(
             "osd", "dump", "--format=json"))['blocklist']
         if addr in blocklist:
             return True
@@ -350,7 +359,7 @@ class MDSCluster(CephCluster):
         Inform MDSMonitor of the death of the daemon process(es).  If it held
         a rank, that rank will be relinquished.
         """
-        self._one_or_all(mds_id, lambda id_: self.mon_manager.raw_cluster_cmd("mds", "fail", id_))
+        self._one_or_all(mds_id, lambda id_: self.get_ceph_cmd_stdout("mds", "fail", id_))
 
     def mds_restart(self, mds_id=None):
         self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].restart())
@@ -364,7 +373,7 @@ class MDSCluster(CephCluster):
         """
         def _fail_restart(id_):
             self.mds_daemons[id_].stop()
-            self.mon_manager.raw_cluster_cmd("mds", "fail", id_)
+            self.run_ceph_cmd("mds", "fail", id_)
             self.mds_daemons[id_].restart()
 
         self._one_or_all(mds_id, _fail_restart)
@@ -468,7 +477,7 @@ class MDSCluster(CephCluster):
         return FSStatus(self.mon_manager).get_mds(mds_id)
 
     def is_pool_full(self, pool_name):
-        pools = json.loads(self.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools']
+        pools = json.loads(self.get_ceph_cmd_stdout("osd", "dump", "--format=json-pretty"))['pools']
         for pool in pools:
             if pool['pool_name'] == pool_name:
                 return 'full' in pool['flags_names'].split(",")
@@ -575,21 +584,21 @@ class Filesystem(MDSCluster):
         assert(mds_map['in'] == list(range(0, mds_map['max_mds'])))
 
     def reset(self):
-        self.mon_manager.raw_cluster_cmd("fs", "reset", str(self.name), '--yes-i-really-mean-it')
+        self.run_ceph_cmd("fs", "reset", str(self.name), '--yes-i-really-mean-it')
 
     def fail(self):
-        self.mon_manager.raw_cluster_cmd("fs", "fail", str(self.name))
+        self.run_ceph_cmd("fs", "fail", str(self.name))
 
     def set_flag(self, var, *args):
         a = map(lambda x: str(x).lower(), args)
-        self.mon_manager.raw_cluster_cmd("fs", "flag", "set", var, *a)
+        self.run_ceph_cmd("fs", "flag", "set", var, *a)
 
     def set_allow_multifs(self, yes=True):
         self.set_flag("enable_multiple", yes)
 
     def set_var(self, var, *args):
         a = map(lambda x: str(x).lower(), args)
-        self.mon_manager.raw_cluster_cmd("fs", "set", self.name, var, *a)
+        self.run_ceph_cmd("fs", "set", self.name, var, *a)
 
     def set_down(self, down=True):
         self.set_var("down", str(down).lower())
@@ -615,9 +624,12 @@ class Filesystem(MDSCluster):
     def set_refuse_client_session(self, yes):
         self.set_var("refuse_client_session", yes)
 
+    def set_refuse_standby_for_another_fs(self, yes):
+        self.set_var("refuse_standby_for_another_fs", yes)
+
     def compat(self, *args):
         a = map(lambda x: str(x).lower(), args)
-        self.mon_manager.raw_cluster_cmd("fs", "compat", self.name, *a)
+        self.run_ceph_cmd("fs", "compat", self.name, *a)
 
     def add_compat(self, *args):
         self.compat("add_compat", *args)
@@ -633,7 +645,7 @@ class Filesystem(MDSCluster):
 
     def required_client_features(self, *args, **kwargs):
         c = ["fs", "required_client_features", self.name, *args]
-        return self.mon_manager.run_cluster_cmd(args=c, **kwargs)
+        return self.run_ceph_cmd(args=c, **kwargs)
 
     # Since v15.1.0 the pg autoscale mode has been enabled as default,
     # will let the pg autoscale mode to calculate the pg_num as needed.
@@ -662,24 +674,23 @@ class Filesystem(MDSCluster):
         log.debug("Creating filesystem '{0}'".format(self.name))
 
         try:
-            self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
-                                             self.metadata_pool_name,
-                                             '--pg_num_min', str(self.pg_num_min))
-
-            self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
-                                             data_pool_name, str(self.pg_num),
-                                             '--pg_num_min', str(self.pg_num_min),
-                                             '--target_size_ratio',
-                                             str(self.target_size_ratio))
+            self.run_ceph_cmd('osd', 'pool', 'create',self.metadata_pool_name,
+                              '--pg_num_min', str(self.pg_num_min))
+
+            self.run_ceph_cmd('osd', 'pool', 'create', data_pool_name,
+                              str(self.pg_num),
+                              '--pg_num_min', str(self.pg_num_min),
+                              '--target_size_ratio',
+                              str(self.target_size_ratio))
         except CommandFailedError as e:
             if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option
-                self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
-                                                 self.metadata_pool_name,
-                                                 str(self.pg_num_min))
+                self.run_ceph_cmd('osd', 'pool', 'create',
+                                  self.metadata_pool_name,
+                                  str(self.pg_num_min))
 
-                self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
-                                                 data_pool_name, str(self.pg_num),
-                                                 str(self.pg_num_min))
+                self.run_ceph_cmd('osd', 'pool', 'create',
+                                  data_pool_name, str(self.pg_num),
+                                  str(self.pg_num_min))
             else:
                 raise
 
@@ -688,7 +699,7 @@ class Filesystem(MDSCluster):
             args.append('--recover')
         if metadata_overlay:
             args.append('--allow-dangerous-metadata-overlay')
-        self.mon_manager.raw_cluster_cmd(*args)
+        self.run_ceph_cmd(*args)
 
         if not recover:
             if self.ec_profile and 'disabled' not in self.ec_profile:
@@ -696,23 +707,22 @@ class Filesystem(MDSCluster):
                 log.debug("EC profile is %s", self.ec_profile)
                 cmd = ['osd', 'erasure-code-profile', 'set', ec_data_pool_name]
                 cmd.extend(self.ec_profile)
-                self.mon_manager.raw_cluster_cmd(*cmd)
+                self.run_ceph_cmd(*cmd)
                 try:
-                    self.mon_manager.raw_cluster_cmd(
+                    self.run_ceph_cmd(
                         'osd', 'pool', 'create', ec_data_pool_name,
                         'erasure', ec_data_pool_name,
                         '--pg_num_min', str(self.pg_num_min),
                         '--target_size_ratio', str(self.target_size_ratio_ec))
                 except CommandFailedError as e:
                     if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option
-                        self.mon_manager.raw_cluster_cmd(
+                        self.run_ceph_cmd(
                             'osd', 'pool', 'create', ec_data_pool_name,
                             str(self.pg_num_min), 'erasure', ec_data_pool_name)
                     else:
                         raise
-                self.mon_manager.raw_cluster_cmd(
-                    'osd', 'pool', 'set',
-                    ec_data_pool_name, 'allow_ec_overwrites', 'true')
+                self.run_ceph_cmd('osd', 'pool', 'set', ec_data_pool_name,
+                                  'allow_ec_overwrites', 'true')
                 self.add_data_pool(ec_data_pool_name, create=False)
                 self.check_pool_application(ec_data_pool_name)
 
@@ -723,7 +733,8 @@ class Filesystem(MDSCluster):
 
         # Turn off spurious standby count warnings from modifying max_mds in tests.
         try:
-            self.mon_manager.raw_cluster_cmd('fs', 'set', self.name, 'standby_count_wanted', '0')
+            self.run_ceph_cmd('fs', 'set', self.name, 'standby_count_wanted',
+                              '0')
         except CommandFailedError as e:
             if e.exitstatus == 22:
                 # standby_count_wanted not available prior to luminous (upgrade tests would fail otherwise)
@@ -756,17 +767,29 @@ class Filesystem(MDSCluster):
                 assert(isinstance(subvols['create'], int))
                 assert(subvols['create'] > 0)
 
+                self.mon_manager.raw_cluster_cmd('fs', 'subvolumegroup', 'create', self.name, 'qa')
+                subvol_options = self.fs_config.get('subvol_options', '')
+
                 for sv in range(0, subvols['create']):
                     sv_name = f'sv_{sv}'
-                    self.mon_manager.raw_cluster_cmd(
-                        'fs', 'subvolume', 'create', self.name, sv_name,
-                        self.fs_config.get('subvol_options', ''))
+                    cmd = [
+                      'fs',
+                      'subvolume',
+                      'create',
+                      self.name,
+                      sv_name,
+                      '--group_name', 'qa',
+                    ]
+                    if subvol_options:
+                        cmd.append(subvol_options)
+                    self.run_ceph_cmd(cmd)
 
                     if self.name not in self._ctx.created_subvols:
                         self._ctx.created_subvols[self.name] = []
                     
-                    subvol_path = self.mon_manager.raw_cluster_cmd(
-                        'fs', 'subvolume', 'getpath', self.name, sv_name)
+                    subvol_path = self.get_ceph_cmd_stdout(
+                        'fs', 'subvolume', 'getpath', self.name,
+                        '--group_name', 'qa', sv_name)
                     subvol_path = subvol_path.strip()
                     self._ctx.created_subvols[self.name].append(subvol_path)
             else:
@@ -858,7 +881,7 @@ class Filesystem(MDSCluster):
         """
         Whether a filesystem exists in the mon's filesystem list
         """
-        fs_list = json.loads(self.mon_manager.raw_cluster_cmd('fs', 'ls', '--format=json-pretty'))
+        fs_list = json.loads(self.get_ceph_cmd_stdout('fs', 'ls', '--format=json-pretty'))
         return self.name in [fs['name'] for fs in fs_list]
 
     def legacy_configured(self):
@@ -867,7 +890,7 @@ class Filesystem(MDSCluster):
         the case, the caller should avoid using Filesystem.create
         """
         try:
-            out_text = self.mon_manager.raw_cluster_cmd('--format=json-pretty', 'osd', 'lspools')
+            out_text = self.get_ceph_cmd_stdout('--format=json-pretty', 'osd', 'lspools')
             pools = json.loads(out_text)
             metadata_pool_exists = 'metadata' in [p['poolname'] for p in pools]
             if metadata_pool_exists:
@@ -883,7 +906,7 @@ class Filesystem(MDSCluster):
         return metadata_pool_exists
 
     def _df(self):
-        return json.loads(self.mon_manager.raw_cluster_cmd("df", "--format=json-pretty"))
+        return json.loads(self.get_ceph_cmd_stdout("df", "--format=json-pretty"))
 
     # may raise FSMissing
     def get_mds_map(self, status=None):
@@ -901,15 +924,15 @@ class Filesystem(MDSCluster):
     def add_data_pool(self, name, create=True):
         if create:
             try:
-                self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name,
-                                                 '--pg_num_min', str(self.pg_num_min))
+                self.run_ceph_cmd('osd', 'pool', 'create', name,
+                                  '--pg_num_min', str(self.pg_num_min))
             except CommandFailedError as e:
                 if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option
-                  self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name,
-                                                   str(self.pg_num_min))
+                  self.run_ceph_cmd('osd', 'pool', 'create', name,
+                                    str(self.pg_num_min))
                 else:
                     raise
-        self.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', self.name, name)
+        self.run_ceph_cmd('fs', 'add_data_pool', self.name, name)
         self.get_pool_names(refresh = True)
         for poolid, fs_name in self.data_pools.items():
             if name == fs_name:
@@ -962,9 +985,9 @@ class Filesystem(MDSCluster):
         self.data_pool_name = name
 
     def get_pool_pg_num(self, pool_name):
-        pgs = json.loads(self.mon_manager.raw_cluster_cmd('osd', 'pool', 'get',
-                                                          pool_name, 'pg_num',
-                                                          '--format=json-pretty'))
+        pgs = json.loads(self.get_ceph_cmd_stdout('osd', 'pool', 'get',
+                                                  pool_name, 'pg_num',
+                                                  '--format=json-pretty'))
         return int(pgs['pg_num'])
 
     def get_namespace_id(self):
@@ -1095,13 +1118,13 @@ class Filesystem(MDSCluster):
         self.mds_signal(name, signal)
 
     def rank_freeze(self, yes, rank=0):
-        self.mon_manager.raw_cluster_cmd("mds", "freeze", "{}:{}".format(self.id, rank), str(yes).lower())
+        self.run_ceph_cmd("mds", "freeze", "{}:{}".format(self.id, rank), str(yes).lower())
 
     def rank_repaired(self, rank):
-        self.mon_manager.raw_cluster_cmd("mds", "repaired", "{}:{}".format(self.id, rank))
+        self.run_ceph_cmd("mds", "repaired", "{}:{}".format(self.id, rank))
 
     def rank_fail(self, rank=0):
-        self.mon_manager.raw_cluster_cmd("mds", "fail", "{}:{}".format(self.id, rank))
+        self.run_ceph_cmd("mds", "fail", "{}:{}".format(self.id, rank))
 
     def rank_is_running(self, rank=0, status=None):
         name = self.get_rank(rank=rank, status=status)['name']
@@ -1240,15 +1263,15 @@ class Filesystem(MDSCluster):
         if mds_id is None:
             return self.rank_tell(command)
 
-        return json.loads(self.mon_manager.raw_cluster_cmd("tell", f"mds.{mds_id}", *command))
+        return json.loads(self.get_ceph_cmd_stdout("tell", f"mds.{mds_id}", *command))
 
     def rank_asok(self, command, rank=0, status=None, timeout=None):
         info = self.get_rank(rank=rank, status=status)
         return self.json_asok(command, 'mds', info['name'], timeout=timeout)
 
-    def rank_tell(self, command, rank=0, status=None):
+    def rank_tell(self, command, rank=0, status=None, timeout=120):
         try:
-            out = self.mon_manager.raw_cluster_cmd("tell", f"mds.{self.id}:{rank}", *command)
+            out = self.get_ceph_cmd_stdout("tell", f"mds.{self.id}:{rank}", *command)
             return json.loads(out)
         except json.decoder.JSONDecodeError:
             log.error("could not decode: {}".format(out))
@@ -1648,8 +1671,8 @@ class Filesystem(MDSCluster):
             caps = tuple(x)
 
         client_name = 'client.' + client_id
-        return self.mon_manager.raw_cluster_cmd('fs', 'authorize', self.name,
-                                                client_name, *caps)
+        return self.get_ceph_cmd_stdout('fs', 'authorize', self.name,
+                                        client_name, *caps)
 
     def grow(self, new_max_mds, status=None):
         oldmax = self.get_var('max_mds', status=status)
@@ -1663,11 +1686,11 @@ class Filesystem(MDSCluster):
         self.set_max_mds(new_max_mds)
         return self.wait_for_daemons()
 
-    def run_scrub(self, cmd, rank=0):
-        return self.rank_tell(["scrub"] + cmd, rank)
+    def run_scrub(self, cmd, rank=0, timeout=300):
+        return self.rank_tell(["scrub"] + cmd, rank=rank, timeout=timeout)
 
     def get_scrub_status(self, rank=0):
-        return self.run_scrub(["status"], rank)
+        return self.run_scrub(["status"], rank=rank, timeout=300)
 
     def flush(self, rank=0):
         return self.rank_tell(["flush", "journal"], rank=rank)
@@ -1679,7 +1702,7 @@ class Filesystem(MDSCluster):
             result = "no active scrubs running"
         with contextutil.safe_while(sleep=sleep, tries=timeout//sleep) as proceed:
             while proceed():
-                out_json = self.rank_tell(["scrub", "status"], rank=rank)
+                out_json = self.rank_tell(["scrub", "status"], rank=rank, timeout=timeout)
                 assert out_json is not None
                 if not reverse:
                     if result in out_json['status']:
diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py
index 89f6b6639..c59f661a3 100644
--- a/qa/tasks/cephfs/kernel_mount.py
+++ b/qa/tasks/cephfs/kernel_mount.py
@@ -260,9 +260,10 @@ class KernelMount(CephFSMount):
                 import json
 
                 def get_id_to_dir():
-                    result = {}
+                    meta_dir = "{meta_dir}"
+                    result = dict()
                     for dir in glob.glob("/sys/kernel/debug/ceph/*"):
-                        if os.path.basename(dir) == DEBUGFS_META_DIR:
+                        if os.path.basename(dir) == meta_dir:
                             continue
                         mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines()
                         global_id = mds_sessions_lines[0].split()[1].strip('"')
@@ -270,7 +271,7 @@ class KernelMount(CephFSMount):
                         result[client_id] = global_id
                     return result
                 print(json.dumps(get_id_to_dir()))
-            """)
+            """.format(meta_dir=DEBUGFS_META_DIR))
 
             output = self.client_remote.sh([
                 'sudo', 'python3', '-c', pyscript
@@ -342,7 +343,7 @@ echo '{fdata}' | sudo tee /sys/kernel/debug/dynamic_debug/control
         if self.inst is not None:
             return self.inst
 
-        client_gid = "client%d" % self.get_global_id()
+        client_gid = "client%d" % int(self.get_global_id())
         self.inst = " ".join([client_gid, self._global_addr])
         return self.inst
 
diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py
index 4a8187406..bd92cadaa 100644
--- a/qa/tasks/cephfs/mount.py
+++ b/qa/tasks/cephfs/mount.py
@@ -195,10 +195,10 @@ class CephFSMount(object):
         self.fs = Filesystem(self.ctx, name=self.cephfs_name)
 
         try:
-            output = self.fs.mon_manager.raw_cluster_cmd(args='osd blocklist ls')
+            output = self.fs.get_ceph_cmd_stdout('osd blocklist ls')
         except CommandFailedError:
             # Fallback for older Ceph cluster
-            output = self.fs.mon_manager.raw_cluster_cmd(args='osd blacklist ls')
+            output = self.fs.get_ceph_cmd_stdout('osd blacklist ls')
 
         return self.addr in output
 
@@ -740,15 +740,19 @@ class CephFSMount(object):
         if perms:
             self.run_shell(args=f'chmod {perms} {path}')
 
-    def read_file(self, path):
+    def read_file(self, path, sudo=False):
         """
         Return the data from the file on given path.
         """
         if path.find(self.hostfs_mntpt) == -1:
             path = os.path.join(self.hostfs_mntpt, path)
 
-        return self.run_shell(args=['cat', path]).\
-            stdout.getvalue().strip()
+        args = []
+        if sudo:
+            args.append('sudo')
+        args += ['cat', path]
+
+        return self.run_shell(args=args, omit_sudo=False).stdout.getvalue().strip()
 
     def create_destroy(self):
         assert(self.is_mounted())
diff --git a/qa/tasks/cephfs/test_admin.py b/qa/tasks/cephfs/test_admin.py
index 9890381c6..4f3100bbe 100644
--- a/qa/tasks/cephfs/test_admin.py
+++ b/qa/tasks/cephfs/test_admin.py
@@ -7,6 +7,7 @@ from io import StringIO
 from os.path import join as os_path_join
 
 from teuthology.exceptions import CommandFailedError
+from teuthology.contextutil import safe_while
 
 from tasks.cephfs.cephfs_test_case import CephFSTestCase, classhook
 from tasks.cephfs.filesystem import FileLayout, FSMissing
@@ -15,6 +16,58 @@ from tasks.cephfs.caps_helper import CapTester
 
 log = logging.getLogger(__name__)
 
+class TestLabeledPerfCounters(CephFSTestCase):
+    CLIENTS_REQUIRED = 2
+    MDSS_REQUIRED = 1
+
+    def test_per_client_labeled_perf_counters(self):
+        """
+        That the per-client labelled perf counters depict the clients
+        performaing IO.
+        """
+        def get_counters_for(filesystem, client_id):
+            dump = self.fs.rank_tell(["counter", "dump"])
+            per_client_metrics_key = f'mds_client_metrics-{filesystem}'
+            counters = [c["counters"] for \
+                        c in dump[per_client_metrics_key] if c["labels"]["client"] == client_id]
+            return counters[0]
+
+        # sleep a bit so that we get updated clients...
+        time.sleep(10)
+
+        # lookout for clients...
+        dump = self.fs.rank_tell(["counter", "dump"])
+
+        fs_suffix = dump["mds_client_metrics"][0]["labels"]["fs_name"]
+        self.assertGreaterEqual(dump["mds_client_metrics"][0]["counters"]["num_clients"], 2)
+
+        per_client_metrics_key = f'mds_client_metrics-{fs_suffix}'
+        mount_a_id = f'client.{self.mount_a.get_global_id()}'
+        mount_b_id = f'client.{self.mount_b.get_global_id()}'
+
+        clients = [c["labels"]["client"] for c in dump[per_client_metrics_key]]
+        self.assertIn(mount_a_id, clients)
+        self.assertIn(mount_b_id, clients)
+
+        # write workload
+        self.mount_a.create_n_files("test_dir/test_file", 1000, sync=True)
+        with safe_while(sleep=1, tries=30, action=f'wait for counters - {mount_a_id}') as proceed:
+            counters_dump_a = get_counters_for(fs_suffix, mount_a_id)
+            while proceed():
+                if counters_dump_a["total_write_ops"] > 0 and counters_dump_a["total_write_size"] > 0:
+                    return True
+
+        # read from the other client
+        for i in range(100):
+            self.mount_b.open_background(basename=f'test_dir/test_file_{i}', write=False)
+        with safe_while(sleep=1, tries=30, action=f'wait for counters - {mount_b_id}') as proceed:
+            counters_dump_b = get_counters_for(fs_suffix, mount_b_id)
+            while proceed():
+                if counters_dump_b["total_read_ops"] > 0 and counters_dump_b["total_read_size"] > 0:
+                    return True
+
+        self.fs.teardown()
+
 class TestAdminCommands(CephFSTestCase):
     """
     Tests for administration command.
@@ -24,18 +77,18 @@ class TestAdminCommands(CephFSTestCase):
     MDSS_REQUIRED = 1
 
     def check_pool_application_metadata_key_value(self, pool, app, key, value):
-        output = self.fs.mon_manager.raw_cluster_cmd(
+        output = self.get_ceph_cmd_stdout(
             'osd', 'pool', 'application', 'get', pool, app, key)
         self.assertEqual(str(output.strip()), value)
 
     def setup_ec_pools(self, n, metadata=True, overwrites=True):
         if metadata:
-            self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', n+"-meta", "8")
+            self.run_ceph_cmd('osd', 'pool', 'create', n+"-meta", "8")
         cmd = ['osd', 'erasure-code-profile', 'set', n+"-profile", "m=2", "k=2", "crush-failure-domain=osd"]
-        self.fs.mon_manager.raw_cluster_cmd(*cmd)
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', n+"-data", "8", "erasure", n+"-profile")
+        self.run_ceph_cmd(cmd)
+        self.run_ceph_cmd('osd', 'pool', 'create', n+"-data", "8", "erasure", n+"-profile")
         if overwrites:
-            self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'set', n+"-data", 'allow_ec_overwrites', 'true')
+            self.run_ceph_cmd('osd', 'pool', 'set', n+"-data", 'allow_ec_overwrites', 'true')
 
 @classhook('_add_valid_tell')
 class TestValidTell(TestAdminCommands):
@@ -76,13 +129,13 @@ class TestFsStatus(TestAdminCommands):
         That `ceph fs status` command functions.
         """
 
-        s = self.fs.mon_manager.raw_cluster_cmd("fs", "status")
+        s = self.get_ceph_cmd_stdout("fs", "status")
         self.assertTrue("active" in s)
 
-        mdsmap = json.loads(self.fs.mon_manager.raw_cluster_cmd("fs", "status", "--format=json-pretty"))["mdsmap"]
+        mdsmap = json.loads(self.get_ceph_cmd_stdout("fs", "status", "--format=json-pretty"))["mdsmap"]
         self.assertEqual(mdsmap[0]["state"], "active")
 
-        mdsmap = json.loads(self.fs.mon_manager.raw_cluster_cmd("fs", "status", "--format=json"))["mdsmap"]
+        mdsmap = json.loads(self.get_ceph_cmd_stdout("fs", "status", "--format=json"))["mdsmap"]
         self.assertEqual(mdsmap[0]["state"], "active")
 
 
@@ -104,7 +157,7 @@ class TestAddDataPool(TestAdminCommands):
         That the application metadata set on a newly added data pool is as expected.
         """
         pool_name = "foo"
-        mon_cmd = self.fs.mon_manager.raw_cluster_cmd
+        mon_cmd = self.get_ceph_cmd_stdout
         mon_cmd('osd', 'pool', 'create', pool_name, '--pg_num_min',
                 str(self.fs.pg_num_min))
         # Check whether https://tracker.ceph.com/issues/43061 is fixed
@@ -148,22 +201,22 @@ class TestAddDataPool(TestAdminCommands):
         first_fs = "first_fs"
         first_metadata_pool = "first_metadata_pool"
         first_data_pool = "first_data_pool"
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', first_metadata_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', first_data_pool)
+        self.run_ceph_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
 
         # create second data pool, metadata pool and add with filesystem
         second_fs = "second_fs"
         second_metadata_pool = "second_metadata_pool"
         second_data_pool = "second_data_pool"
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool)
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool)
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', second_metadata_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', second_data_pool)
+        self.run_ceph_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool)
 
         # try to add 'first_data_pool' with 'second_fs'
         # Expecting EINVAL exit status because 'first_data_pool' is already in use with 'first_fs'
         try:
-            self.fs.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', second_fs, first_data_pool)
+            self.run_ceph_cmd('fs', 'add_data_pool', second_fs, first_data_pool)
         except CommandFailedError as e:
             self.assertEqual(e.exitstatus, errno.EINVAL)
         else:
@@ -178,23 +231,23 @@ class TestAddDataPool(TestAdminCommands):
         first_fs = "first_fs"
         first_metadata_pool = "first_metadata_pool"
         first_data_pool = "first_data_pool"
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', first_metadata_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', first_data_pool)
+        self.run_ceph_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
 
         # create second data pool, metadata pool and add with filesystem
         second_fs = "second_fs"
         second_metadata_pool = "second_metadata_pool"
         second_data_pool = "second_data_pool"
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool)
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool)
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', second_metadata_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', second_data_pool)
+        self.run_ceph_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool)
 
         # try to add 'second_metadata_pool' with 'first_fs' as a data pool
         # Expecting EINVAL exit status because 'second_metadata_pool'
         # is already in use with 'second_fs' as a metadata pool
         try:
-            self.fs.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', first_fs, second_metadata_pool)
+            self.run_ceph_cmd('fs', 'add_data_pool', first_fs, second_metadata_pool)
         except CommandFailedError as e:
             self.assertEqual(e.exitstatus, errno.EINVAL)
         else:
@@ -211,23 +264,21 @@ class TestFsNew(TestAdminCommands):
         metapoolname, datapoolname = n+'-testmetapool', n+'-testdatapool'
         badname = n+'badname@#'
 
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
-                                            n+metapoolname)
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
-                                            n+datapoolname)
+        self.run_ceph_cmd('osd', 'pool', 'create', n+metapoolname)
+        self.run_ceph_cmd('osd', 'pool', 'create', n+datapoolname)
 
         # test that fsname not with "goodchars" fails
         args = ['fs', 'new', badname, metapoolname, datapoolname]
-        proc = self.fs.mon_manager.run_cluster_cmd(args=args,stderr=StringIO(),
-                                                   check_status=False)
+        proc = self.run_ceph_cmd(args=args, stderr=StringIO(),
+                                 check_status=False)
         self.assertIn('invalid chars', proc.stderr.getvalue().lower())
 
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'rm', metapoolname,
-                                            metapoolname,
-                                            '--yes-i-really-really-mean-it-not-faking')
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'rm', datapoolname,
-                                            datapoolname,
-                                            '--yes-i-really-really-mean-it-not-faking')
+        self.run_ceph_cmd('osd', 'pool', 'rm', metapoolname,
+                          metapoolname,
+                          '--yes-i-really-really-mean-it-not-faking')
+        self.run_ceph_cmd('osd', 'pool', 'rm', datapoolname,
+                          datapoolname,
+                          '--yes-i-really-really-mean-it-not-faking')
 
     def test_new_default_ec(self):
         """
@@ -239,7 +290,7 @@ class TestFsNew(TestAdminCommands):
         n = "test_new_default_ec"
         self.setup_ec_pools(n)
         try:
-            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data")
+            self.run_ceph_cmd('fs', 'new', n, n+"-meta", n+"-data")
         except CommandFailedError as e:
             if e.exitstatus == 22:
                 pass
@@ -257,7 +308,7 @@ class TestFsNew(TestAdminCommands):
         self.mds_cluster.delete_all_filesystems()
         n = "test_new_default_ec_force"
         self.setup_ec_pools(n)
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force")
+        self.run_ceph_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force")
 
     def test_new_default_ec_no_overwrite(self):
         """
@@ -269,7 +320,7 @@ class TestFsNew(TestAdminCommands):
         n = "test_new_default_ec_no_overwrite"
         self.setup_ec_pools(n, overwrites=False)
         try:
-            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data")
+            self.run_ceph_cmd('fs', 'new', n, n+"-meta", n+"-data")
         except CommandFailedError as e:
             if e.exitstatus == 22:
                 pass
@@ -279,7 +330,7 @@ class TestFsNew(TestAdminCommands):
             raise RuntimeError("expected failure")
         # and even with --force !
         try:
-            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force")
+            self.run_ceph_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force")
         except CommandFailedError as e:
             if e.exitstatus == 22:
                 pass
@@ -297,7 +348,7 @@ class TestFsNew(TestAdminCommands):
         fs_name = "test_fs_new_pool_application"
         keys = ['metadata', 'data']
         pool_names = [fs_name+'-'+key for key in keys]
-        mon_cmd = self.fs.mon_manager.raw_cluster_cmd
+        mon_cmd = self.get_ceph_cmd_stdout
         for p in pool_names:
             mon_cmd('osd', 'pool', 'create', p, '--pg_num_min', str(self.fs.pg_num_min))
             mon_cmd('osd', 'pool', 'application', 'enable', p, 'cephfs')
@@ -315,8 +366,8 @@ class TestFsNew(TestAdminCommands):
         keys = ['metadata', 'data']
         pool_names = [fs_name+'-'+key for key in keys]
         for p in pool_names:
-            self.run_cluster_cmd(f'osd pool create {p}')
-        self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid  {fscid} --force')
+            self.run_ceph_cmd(f'osd pool create {p}')
+        self.run_ceph_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid  {fscid} --force')
         self.fs.status().get_fsmap(fscid)
         for i in range(2):
             self.check_pool_application_metadata_key_value(pool_names[i], 'cephfs', keys[i], fs_name)
@@ -330,9 +381,9 @@ class TestFsNew(TestAdminCommands):
         keys = ['metadata', 'data']
         pool_names = [fs_name+'-'+key for key in keys]
         for p in pool_names:
-            self.run_cluster_cmd(f'osd pool create {p}')
-        self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid  {fscid} --force')
-        self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid  {fscid} --force')
+            self.run_ceph_cmd(f'osd pool create {p}')
+        self.run_ceph_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid  {fscid} --force')
+        self.run_ceph_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid  {fscid} --force')
         self.fs.status().get_fsmap(fscid)
 
     def test_fs_new_with_specific_id_fails_without_force_flag(self):
@@ -344,9 +395,9 @@ class TestFsNew(TestAdminCommands):
         keys = ['metadata', 'data']
         pool_names = [fs_name+'-'+key for key in keys]
         for p in pool_names:
-            self.run_cluster_cmd(f'osd pool create {p}')
+            self.run_ceph_cmd(f'osd pool create {p}')
         try:
-            self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid  {fscid}')
+            self.run_ceph_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid  {fscid}')
         except CommandFailedError as ce:
             self.assertEqual(ce.exitstatus, errno.EINVAL,
                 "invalid error code on creating a file system with specifc ID without --force flag")
@@ -363,9 +414,9 @@ class TestFsNew(TestAdminCommands):
         keys = ['metadata', 'data']
         pool_names = [fs_name+'-'+key for key in keys]
         for p in pool_names:
-            self.run_cluster_cmd(f'osd pool create {p}')
+            self.run_ceph_cmd(f'osd pool create {p}')
         try:
-            self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid  {fscid} --force')
+            self.run_ceph_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid  {fscid} --force')
         except CommandFailedError as ce:
             self.assertEqual(ce.exitstatus, errno.EINVAL,
                 "invalid error code on creating a file system with specifc ID that is already in use")
@@ -381,13 +432,13 @@ class TestFsNew(TestAdminCommands):
         first_fs = "first_fs"
         first_metadata_pool = "first_metadata_pool"
         first_data_pool = "first_data_pool"
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', first_metadata_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', first_data_pool)
+        self.run_ceph_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
 
         second_fs = "second_fs"
         second_data_pool = "second_data_pool"
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', second_data_pool)
 
         # try to create new fs 'second_fs' with following configuration
         # metadata pool -> 'first_metadata_pool'
@@ -395,7 +446,7 @@ class TestFsNew(TestAdminCommands):
         # Expecting EINVAL exit status because 'first_metadata_pool'
         # is already in use with 'first_fs'
         try:
-            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, first_metadata_pool, second_data_pool)
+            self.run_ceph_cmd('fs', 'new', second_fs, first_metadata_pool, second_data_pool)
         except CommandFailedError as e:
             self.assertEqual(e.exitstatus, errno.EINVAL)
         else:
@@ -410,13 +461,13 @@ class TestFsNew(TestAdminCommands):
         first_fs = "first_fs"
         first_metadata_pool = "first_metadata_pool"
         first_data_pool = "first_data_pool"
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', first_metadata_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', first_data_pool)
+        self.run_ceph_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
 
         second_fs = "second_fs"
         second_metadata_pool = "second_metadata_pool"
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', second_metadata_pool)
 
         # try to create new fs 'second_fs' with following configuration
         # metadata pool -> 'second_metadata_pool'
@@ -424,7 +475,7 @@ class TestFsNew(TestAdminCommands):
         # Expecting EINVAL exit status because 'first_data_pool'
         # is already in use with 'first_fs'
         try:
-            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, first_data_pool)
+            self.run_ceph_cmd('fs', 'new', second_fs, second_metadata_pool, first_data_pool)
         except CommandFailedError as e:
             self.assertEqual(e.exitstatus, errno.EINVAL)
         else:
@@ -439,9 +490,9 @@ class TestFsNew(TestAdminCommands):
         first_fs = "first_fs"
         first_metadata_pool = "first_metadata_pool"
         first_data_pool = "first_data_pool"
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', first_metadata_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', first_data_pool)
+        self.run_ceph_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
 
         second_fs = "second_fs"
 
@@ -451,7 +502,7 @@ class TestFsNew(TestAdminCommands):
         # Expecting EINVAL exit status because 'first_metadata_pool' and 'first_data_pool'
         # is already in use with 'first_fs'
         try:
-            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, first_metadata_pool, first_data_pool)
+            self.run_ceph_cmd('fs', 'new', second_fs, first_metadata_pool, first_data_pool)
         except CommandFailedError as e:
             self.assertEqual(e.exitstatus, errno.EINVAL)
         else:
@@ -466,17 +517,17 @@ class TestFsNew(TestAdminCommands):
         first_fs = "first_fs"
         first_metadata_pool = "first_metadata_pool"
         first_data_pool = "first_data_pool"
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', first_metadata_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', first_data_pool)
+        self.run_ceph_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
 
         # create second data pool, metadata pool and add with filesystem
         second_fs = "second_fs"
         second_metadata_pool = "second_metadata_pool"
         second_data_pool = "second_data_pool"
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool)
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool)
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', second_metadata_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', second_data_pool)
+        self.run_ceph_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool)
 
         third_fs = "third_fs"
 
@@ -486,7 +537,7 @@ class TestFsNew(TestAdminCommands):
         # Expecting EINVAL exit status because 'first_metadata_pool' and 'second_data_pool'
         # is already in use with 'first_fs' and 'second_fs'
         try:
-            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', third_fs, first_metadata_pool, second_data_pool)
+            self.run_ceph_cmd('fs', 'new', third_fs, first_metadata_pool, second_data_pool)
         except CommandFailedError as e:
             self.assertEqual(e.exitstatus, errno.EINVAL)
         else:
@@ -501,9 +552,9 @@ class TestFsNew(TestAdminCommands):
         first_fs = "first_fs"
         first_metadata_pool = "first_metadata_pool"
         first_data_pool = "first_data_pool"
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', first_metadata_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', first_data_pool)
+        self.run_ceph_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
 
         second_fs = "second_fs"
 
@@ -513,7 +564,7 @@ class TestFsNew(TestAdminCommands):
         # Expecting EINVAL exit status because 'first_data_pool' and 'first_metadata_pool'
         # is already in use with 'first_fs'
         try:
-            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, first_data_pool, first_metadata_pool)
+            self.run_ceph_cmd('fs', 'new', second_fs, first_data_pool, first_metadata_pool)
         except CommandFailedError as e:
             self.assertEqual(e.exitstatus, errno.EINVAL)
         else:
@@ -528,17 +579,17 @@ class TestFsNew(TestAdminCommands):
         first_fs = "first_fs"
         first_metadata_pool = "first_metadata_pool"
         first_data_pool = "first_data_pool"
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool)
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool)
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', first_metadata_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', first_data_pool)
+        self.run_ceph_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool)
 
         # create second data pool, metadata pool and add with filesystem
         second_fs = "second_fs"
         second_metadata_pool = "second_metadata_pool"
         second_data_pool = "second_data_pool"
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool)
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool)
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', second_metadata_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', second_data_pool)
+        self.run_ceph_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool)
 
         third_fs = "third_fs"
 
@@ -548,7 +599,7 @@ class TestFsNew(TestAdminCommands):
         # Expecting EINVAL exit status because 'first_data_pool' and 'second_metadata_pool'
         # is already in use with 'first_fs' and 'second_fs'
         try:
-            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', third_fs, first_data_pool, second_metadata_pool)
+            self.run_ceph_cmd('fs', 'new', third_fs, first_data_pool, second_metadata_pool)
         except CommandFailedError as e:
             self.assertEqual(e.exitstatus, errno.EINVAL)
         else:
@@ -561,20 +612,20 @@ class TestFsNew(TestAdminCommands):
 
         # create pool and initialise with rbd
         new_pool = "new_pool"
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', new_pool)
         self.ctx.cluster.run(args=['rbd', 'pool', 'init', new_pool])
 
         new_fs = "new_fs"
         new_data_pool = "new_data_pool"
 
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_data_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', new_data_pool)
 
         # try to create new fs 'new_fs' with following configuration
         # metadata pool -> 'new_pool' (already used by rbd app)
         # data pool -> 'new_data_pool'
         # Expecting EINVAL exit status because 'new_pool' is already in use with 'rbd' app
         try:
-            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', new_fs, new_pool, new_data_pool)
+            self.run_ceph_cmd('fs', 'new', new_fs, new_pool, new_data_pool)
         except CommandFailedError as e:
             self.assertEqual(e.exitstatus, errno.EINVAL)
         else:
@@ -587,20 +638,20 @@ class TestFsNew(TestAdminCommands):
 
         # create pool and initialise with rbd
         new_pool = "new_pool"
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', new_pool)
         self.ctx.cluster.run(args=['rbd', 'pool', 'init', new_pool])
 
         new_fs = "new_fs"
         new_metadata_pool = "new_metadata_pool"
 
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_metadata_pool)
+        self.run_ceph_cmd('osd', 'pool', 'create', new_metadata_pool)
 
         # try to create new fs 'new_fs' with following configuration
         # metadata pool -> 'new_metadata_pool'
         # data pool -> 'new_pool' (already used by rbd app)
         # Expecting EINVAL exit status because 'new_pool' is already in use with 'rbd' app
         try:
-            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', new_fs, new_metadata_pool, new_pool)
+            self.run_ceph_cmd('fs', 'new', new_fs, new_metadata_pool, new_pool)
         except CommandFailedError as e:
             self.assertEqual(e.exitstatus, errno.EINVAL)
         else:
@@ -628,7 +679,7 @@ class TestRenameCommand(TestAdminCommands):
         new_fs_name = 'new_cephfs'
         client_id = 'test_new_cephfs'
 
-        self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
+        self.run_ceph_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
 
         # authorize a cephx ID access to the renamed file system.
         # use the ID to write to the file system.
@@ -649,7 +700,7 @@ class TestRenameCommand(TestAdminCommands):
 
         # cleanup
         self.mount_a.umount_wait()
-        self.run_cluster_cmd(f'auth rm client.{client_id}')
+        self.run_ceph_cmd(f'auth rm client.{client_id}')
 
     def test_fs_rename_idempotency(self):
         """
@@ -661,8 +712,8 @@ class TestRenameCommand(TestAdminCommands):
         orig_fs_name = self.fs.name
         new_fs_name = 'new_cephfs'
 
-        self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
-        self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
+        self.run_ceph_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
+        self.run_ceph_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
 
         # original file system name does not appear in `fs ls` command
         self.assertFalse(self.fs.exists())
@@ -681,10 +732,10 @@ class TestRenameCommand(TestAdminCommands):
         new_fs_name = 'new_cephfs'
         data_pool = self.fs.get_data_pool_name()
         metadata_pool = self.fs.get_metadata_pool_name()
-        self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
+        self.run_ceph_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
 
         try:
-            self.run_cluster_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool}")
+            self.run_ceph_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool}")
         except CommandFailedError as ce:
             self.assertEqual(ce.exitstatus, errno.EINVAL,
                 "invalid error code on creating a new file system with old "
@@ -694,7 +745,7 @@ class TestRenameCommand(TestAdminCommands):
                       "existing pools to fail.")
 
         try:
-            self.run_cluster_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool} --force")
+            self.run_ceph_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool} --force")
         except CommandFailedError as ce:
             self.assertEqual(ce.exitstatus, errno.EINVAL,
                 "invalid error code on creating a new file system with old "
@@ -704,7 +755,7 @@ class TestRenameCommand(TestAdminCommands):
                       "existing pools, and --force flag to fail.")
 
         try:
-            self.run_cluster_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool} "
+            self.run_ceph_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool} "
                                  "--allow-dangerous-metadata-overlay")
         except CommandFailedError as ce:
             self.assertEqual(ce.exitstatus, errno.EINVAL,
@@ -719,7 +770,7 @@ class TestRenameCommand(TestAdminCommands):
         That renaming a file system without '--yes-i-really-mean-it' flag fails.
         """
         try:
-            self.run_cluster_cmd(f"fs rename {self.fs.name} new_fs")
+            self.run_ceph_cmd(f"fs rename {self.fs.name} new_fs")
         except CommandFailedError as ce:
             self.assertEqual(ce.exitstatus, errno.EPERM,
                 "invalid error code on renaming a file system without the  "
@@ -733,7 +784,7 @@ class TestRenameCommand(TestAdminCommands):
         That renaming a non-existent file system fails.
         """
         try:
-            self.run_cluster_cmd("fs rename non_existent_fs new_fs --yes-i-really-mean-it")
+            self.run_ceph_cmd("fs rename non_existent_fs new_fs --yes-i-really-mean-it")
         except CommandFailedError as ce:
             self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on renaming a non-existent fs")
         else:
@@ -746,7 +797,7 @@ class TestRenameCommand(TestAdminCommands):
         self.fs2 = self.mds_cluster.newfs(name='cephfs2', create=True)
 
         try:
-            self.run_cluster_cmd(f"fs rename {self.fs.name} {self.fs2.name} --yes-i-really-mean-it")
+            self.run_ceph_cmd(f"fs rename {self.fs.name} {self.fs2.name} --yes-i-really-mean-it")
         except CommandFailedError as ce:
             self.assertEqual(ce.exitstatus, errno.EINVAL,
                              "invalid error code on renaming to a fs name that is already in use")
@@ -760,14 +811,14 @@ class TestRenameCommand(TestAdminCommands):
         orig_fs_name = self.fs.name
         new_fs_name = 'new_cephfs'
 
-        self.run_cluster_cmd(f'fs mirror enable {orig_fs_name}')
+        self.run_ceph_cmd(f'fs mirror enable {orig_fs_name}')
         try:
-            self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
+            self.run_ceph_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it')
         except CommandFailedError as ce:
             self.assertEqual(ce.exitstatus, errno.EPERM, "invalid error code on renaming a mirrored file system")
         else:
             self.fail("expected renaming of a mirrored file system to fail")
-        self.run_cluster_cmd(f'fs mirror disable {orig_fs_name}')
+        self.run_ceph_cmd(f'fs mirror disable {orig_fs_name}')
 
 
 class TestDump(CephFSTestCase):
@@ -851,13 +902,13 @@ class TestRequiredClientFeatures(CephFSTestCase):
         """
 
         def is_required(index):
-            out = self.fs.mon_manager.raw_cluster_cmd('fs', 'get', self.fs.name, '--format=json-pretty')
+            out = self.get_ceph_cmd_stdout('fs', 'get', self.fs.name, '--format=json-pretty')
             features = json.loads(out)['mdsmap']['required_client_features']
             if "feature_{0}".format(index) in features:
                 return True;
             return False;
 
-        features = json.loads(self.fs.mon_manager.raw_cluster_cmd('fs', 'feature', 'ls', '--format=json-pretty'))
+        features = json.loads(self.get_ceph_cmd_stdout('fs', 'feature', 'ls', '--format=json-pretty'))
         self.assertGreater(len(features), 0);
 
         for f in features:
@@ -1063,7 +1114,7 @@ class TestConfigCommands(CephFSTestCase):
 
         names = self.fs.get_rank_names()
         for n in names:
-            s = self.fs.mon_manager.raw_cluster_cmd("config", "show", "mds."+n)
+            s = self.get_ceph_cmd_stdout("config", "show", "mds."+n)
             self.assertTrue("NAME" in s)
             self.assertTrue("mon_host" in s)
 
@@ -1113,17 +1164,17 @@ class TestMirroringCommands(CephFSTestCase):
     MDSS_REQUIRED = 1
 
     def _enable_mirroring(self, fs_name):
-        self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", fs_name)
+        self.run_ceph_cmd("fs", "mirror", "enable", fs_name)
 
     def _disable_mirroring(self, fs_name):
-        self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", fs_name)
+        self.run_ceph_cmd("fs", "mirror", "disable", fs_name)
 
     def _add_peer(self, fs_name, peer_spec, remote_fs_name):
         peer_uuid = str(uuid.uuid4())
-        self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "peer_add", fs_name, peer_uuid, peer_spec, remote_fs_name)
+        self.run_ceph_cmd("fs", "mirror", "peer_add", fs_name, peer_uuid, peer_spec, remote_fs_name)
 
     def _remove_peer(self, fs_name, peer_uuid):
-        self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "peer_remove", fs_name, peer_uuid)
+        self.run_ceph_cmd("fs", "mirror", "peer_remove", fs_name, peer_uuid)
 
     def _verify_mirroring(self, fs_name, flag_str):
         status = self.fs.status()
@@ -1250,6 +1301,10 @@ class TestFsAuthorize(CephFSTestCase):
         self.captester.run_mds_cap_tests(PERM)
 
     def test_single_path_rootsquash(self):
+        if not isinstance(self.mount_a, FuseMount):
+            self.skipTest("only FUSE client has CEPHFS_FEATURE_MDS_AUTH_CAPS "
+                          "needed to enforce root_squash MDS caps")
+
         PERM = 'rw'
         FS_AUTH_CAPS = (('/', PERM, 'root_squash'),)
         self.captester = CapTester()
@@ -1259,7 +1314,36 @@ class TestFsAuthorize(CephFSTestCase):
         # Since root_squash is set in client caps, client can read but not
         # write even thought access level is set to "rw".
         self.captester.conduct_pos_test_for_read_caps()
+        self.captester.conduct_pos_test_for_open_caps()
         self.captester.conduct_neg_test_for_write_caps(sudo_write=True)
+        self.captester.conduct_neg_test_for_chown_caps()
+        self.captester.conduct_neg_test_for_truncate_caps()
+
+    def test_single_path_rootsquash_issue_56067(self):
+        """
+        That a FS client using root squash MDS caps allows non-root user to write data
+        to a file. And after client remount, the non-root user can read the data that
+        was previously written by it. https://tracker.ceph.com/issues/56067
+        """
+        if not isinstance(self.mount_a, FuseMount):
+            self.skipTest("only FUSE client has CEPHFS_FEATURE_MDS_AUTH_CAPS "
+                          "needed to enforce root_squash MDS caps")
+
+        keyring = self.fs.authorize(self.client_id, ('/', 'rw', 'root_squash'))
+        keyring_path = self.mount_a.client_remote.mktemp(data=keyring)
+        self.mount_a.remount(client_id=self.client_id,
+                             client_keyring_path=keyring_path,
+                             cephfs_mntpt='/')
+        filedata, filename = 'some data on fs 1', 'file_on_fs1'
+        filepath = os_path_join(self.mount_a.hostfs_mntpt, filename)
+        self.mount_a.write_file(filepath, filedata)
+
+        self.mount_a.remount(client_id=self.client_id,
+                             client_keyring_path=keyring_path,
+                             cephfs_mntpt='/')
+        if filepath.find(self.mount_a.hostfs_mntpt) != -1:
+            contents = self.mount_a.read_file(filepath)
+            self.assertEqual(filedata, contents)
 
     def test_single_path_authorize_on_nonalphanumeric_fsname(self):
         """
@@ -1271,10 +1355,10 @@ class TestFsAuthorize(CephFSTestCase):
         fs_name = "cephfs-_."
         self.fs = self.mds_cluster.newfs(name=fs_name)
         self.fs.wait_for_daemons()
-        self.run_cluster_cmd(f'auth caps client.{self.mount_a.client_id} '
-                             f'mon "allow r" '
-                             f'osd "allow rw pool={self.fs.get_data_pool_name()}" '
-                             f'mds allow')
+        self.run_ceph_cmd(f'auth caps client.{self.mount_a.client_id} '
+                          f'mon "allow r" '
+                          f'osd "allow rw pool={self.fs.get_data_pool_name()}" '
+                          f'mds allow')
         self.mount_a.remount(cephfs_name=self.fs.name)
         PERM = 'rw'
         FS_AUTH_CAPS = (('/', PERM),)
@@ -1303,7 +1387,7 @@ class TestFsAuthorize(CephFSTestCase):
         self.run_cap_test_one_by_one(FS_AUTH_CAPS)
 
     def run_cap_test_one_by_one(self, fs_auth_caps):
-        keyring = self.run_cluster_cmd(f'auth get {self.client_name}')
+        keyring = self.run_ceph_cmd(f'auth get {self.client_name}')
         for i, c in enumerate(fs_auth_caps):
             self.assertIn(i, (0, 1))
             PATH = c[0]
@@ -1315,7 +1399,7 @@ class TestFsAuthorize(CephFSTestCase):
 
     def tearDown(self):
         self.mount_a.umount_wait()
-        self.run_cluster_cmd(f'auth rm {self.client_name}')
+        self.run_ceph_cmd(f'auth rm {self.client_name}')
 
         super(type(self), self).tearDown()
 
@@ -1492,3 +1576,68 @@ class TestFsBalRankMask(CephFSTestCase):
             self.fs.set_bal_rank_mask(bal_rank_mask)
         except CommandFailedError as e:
             self.assertEqual(e.exitstatus, errno.EINVAL)
+
+
+class TestPermErrMsg(CephFSTestCase):
+
+    CLIENT_NAME = 'client.testuser'
+    FS1_NAME, FS2_NAME, FS3_NAME = 'abcd', 'efgh', 'ijkl'
+
+    EXPECTED_ERRNO = 22
+    EXPECTED_ERRMSG = ("Permission flags in MDS caps must start with 'r' or "
+                       "'rw' or be '*' or 'all'")
+
+    MONCAP = f'allow r fsname={FS1_NAME}'
+    OSDCAP = f'allow rw tag cephfs data={FS1_NAME}'
+    MDSCAPS = [
+        'allow w',
+        f'allow w fsname={FS1_NAME}',
+
+        f'allow rw fsname={FS1_NAME}, allow w fsname={FS2_NAME}',
+        f'allow w fsname={FS1_NAME}, allow rw fsname={FS2_NAME}',
+        f'allow w fsname={FS1_NAME}, allow w fsname={FS2_NAME}',
+
+        (f'allow rw fsname={FS1_NAME}, allow rw fsname={FS2_NAME}, allow '
+         f'w fsname={FS3_NAME}'),
+
+        # without space after comma
+        f'allow rw fsname={FS1_NAME},allow w fsname={FS2_NAME}',
+
+
+        'allow wr',
+        f'allow wr fsname={FS1_NAME}',
+
+        f'allow rw fsname={FS1_NAME}, allow wr fsname={FS2_NAME}',
+        f'allow wr fsname={FS1_NAME}, allow rw fsname={FS2_NAME}',
+        f'allow wr fsname={FS1_NAME}, allow wr fsname={FS2_NAME}',
+
+        (f'allow rw fsname={FS1_NAME}, allow rw fsname={FS2_NAME}, allow '
+         f'wr fsname={FS3_NAME}'),
+
+        # without space after comma
+        f'allow rw fsname={FS1_NAME},allow wr fsname={FS2_NAME}']
+
+    def _negtestcmd(self, SUBCMD, MDSCAP):
+        return self.negtest_ceph_cmd(
+            args=(f'{SUBCMD} {self.CLIENT_NAME} '
+                  f'mon "{self.MONCAP}" osd "{self.OSDCAP}" mds "{MDSCAP}"'),
+            retval=self.EXPECTED_ERRNO, errmsgs=self.EXPECTED_ERRMSG)
+
+    def test_auth_add(self):
+        for mdscap in self.MDSCAPS:
+            self._negtestcmd('auth add', mdscap)
+
+    def test_auth_get_or_create(self):
+        for mdscap in self.MDSCAPS:
+            self._negtestcmd('auth get-or-create', mdscap)
+
+    def test_auth_get_or_create_key(self):
+        for mdscap in self.MDSCAPS:
+            self._negtestcmd('auth get-or-create-key', mdscap)
+
+    def test_fs_authorize(self):
+        for wrong_perm in ('w', 'wr'):
+            self.negtest_ceph_cmd(
+                args=(f'fs authorize {self.fs.name} {self.CLIENT_NAME} / '
+                      f'{wrong_perm}'), retval=self.EXPECTED_ERRNO,
+                errmsgs=self.EXPECTED_ERRMSG)
diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py
index c4215df33..b76ce4922 100644
--- a/qa/tasks/cephfs/test_client_limits.py
+++ b/qa/tasks/cephfs/test_client_limits.py
@@ -315,7 +315,7 @@ class TestClientLimits(CephFSTestCase):
         self.mount_a.create_n_files("testdir/file2", 5, True)
 
         # Wait for the health warnings. Assume mds can handle 10 request per second at least
-        self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests // 10)
+        self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests // 10, check_in_detail=str(self.mount_a.client_id))
 
     def _test_client_cache_size(self, mount_subdir):
         """
diff --git a/qa/tasks/cephfs/test_client_recovery.py b/qa/tasks/cephfs/test_client_recovery.py
index 1bd6884a9..a01317065 100644
--- a/qa/tasks/cephfs/test_client_recovery.py
+++ b/qa/tasks/cephfs/test_client_recovery.py
@@ -4,6 +4,7 @@ Teuthology task for exercising CephFS client recovery
 """
 
 import logging
+import signal
 from textwrap import dedent
 import time
 import distutils.version as version
@@ -12,6 +13,7 @@ import re
 import string
 import os
 
+from teuthology import contextutil
 from teuthology.orchestra import run
 from teuthology.exceptions import CommandFailedError
 from tasks.cephfs.fuse_mount import FuseMount
@@ -755,3 +757,117 @@ class TestClientRecovery(CephFSTestCase):
             self.assertEqual(len(self.fs.mds_tell(["session", "ls"])), 0)
         self.mount_a.umount_wait(force=True)
 
+
+class TestClientOnLaggyOSD(CephFSTestCase):
+    CLIENTS_REQUIRED = 2
+
+    def make_osd_laggy(self, osd, sleep=120):
+        self.mds_cluster.mon_manager.signal_osd(osd, signal.SIGSTOP)
+        time.sleep(sleep)
+        self.mds_cluster.mon_manager.signal_osd(osd, signal.SIGCONT)
+
+    def clear_laggy_params(self, osd):
+        default_laggy_weight = self.config_get('mon', 'mon_osd_laggy_weight')
+        self.config_set('mon', 'mon_osd_laggy_weight', 1)
+        self.mds_cluster.mon_manager.revive_osd(osd)
+        self.config_set('mon', 'mon_osd_laggy_weight', default_laggy_weight)
+
+    def get_a_random_osd(self):
+        osds = self.mds_cluster.mon_manager.get_osd_status()
+        return random.choice(osds['live'])
+
+    def test_client_eviction_if_config_is_set(self):
+        """
+        If any client gets unresponsive/it's session get idle due to lagginess
+        with any OSD and if config option defer_client_eviction_on_laggy_osds
+        is set true(default true) then make sure clients are not evicted until
+        OSD(s) return to normal.
+        """
+
+        self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false'])
+        self.config_set('mds', 'defer_client_eviction_on_laggy_osds', 'true')
+        self.assertEqual(self.config_get(
+            'mds', 'defer_client_eviction_on_laggy_osds'), 'true')
+
+        # make an OSD laggy
+        osd = self.get_a_random_osd()
+        self.make_osd_laggy(osd)
+
+        try:
+            mount_a_gid = self.mount_a.get_global_id()
+
+            self.mount_a.kill()
+
+            # client session should be open, it gets stale
+            # only after session_timeout time.
+            self.assert_session_state(mount_a_gid, "open")
+
+            # makes session stale
+            time.sleep(self.fs.get_var("session_timeout") * 1.5)
+            self.assert_session_state(mount_a_gid, "stale")
+
+            # it takes time to have laggy clients entries in cluster log,
+            # wait for 6 minutes to see if it is visible, finally restart
+            # the client
+            with contextutil.safe_while(sleep=5, tries=6) as proceed:
+                while proceed():
+                    try:
+                        with self.assert_cluster_log("1 client(s) laggy due to"
+                                                     " laggy OSDs",
+                                                     timeout=55):
+                            # make sure clients weren't evicted
+                            self.assert_session_count(2)
+                            break
+                    except (AssertionError, CommandFailedError) as e:
+                        log.debug(f'{e}, retrying')
+
+            # clear lagginess, expect to get the warning cleared and make sure
+            # client gets evicted
+            self.clear_laggy_params(osd)
+            self.wait_for_health_clear(60)
+            self.assert_session_count(1)
+        finally:
+            self.mount_a.kill_cleanup()
+            self.mount_a.mount_wait()
+            self.mount_a.create_destroy()
+
+    def test_client_eviction_if_config_is_unset(self):
+        """
+        If an OSD is laggy but config option defer_client_eviction_on_laggy_osds
+        is unset then an unresponsive client does get evicted.
+        """
+
+        self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false'])
+        self.config_set('mds', 'defer_client_eviction_on_laggy_osds', 'false')
+        self.assertEqual(self.config_get(
+            'mds', 'defer_client_eviction_on_laggy_osds'), 'false')
+
+        # make an OSD laggy
+        osd = self.get_a_random_osd()
+        self.make_osd_laggy(osd)
+
+        try:
+            session_timeout = self.fs.get_var("session_timeout")
+            mount_a_gid = self.mount_a.get_global_id()
+
+            self.fs.mds_asok(['session', 'config', '%s' % mount_a_gid, 'timeout', '%s' % (session_timeout * 2)])
+
+            self.mount_a.kill()
+
+            self.assert_session_count(2)
+
+            time.sleep(session_timeout * 1.5)
+            self.assert_session_state(mount_a_gid, "open")
+
+            time.sleep(session_timeout)
+            self.assert_session_count(1)
+
+            # make sure warning wasn't seen in cluster log
+            with self.assert_cluster_log("laggy due to laggy OSDs",
+                                         timeout=120, present=False):
+                pass
+        finally:
+            self.mount_a.kill_cleanup()
+            self.mount_a.mount_wait()
+            self.mount_a.create_destroy()
+            self.clear_laggy_params(osd)
diff --git a/qa/tasks/cephfs/test_damage.py b/qa/tasks/cephfs/test_damage.py
index bfaa23453..a39ccaa9f 100644
--- a/qa/tasks/cephfs/test_damage.py
+++ b/qa/tasks/cephfs/test_damage.py
@@ -244,7 +244,7 @@ class TestDamage(CephFSTestCase):
             # Reset MDS state
             self.mount_a.umount_wait(force=True)
             self.fs.fail()
-            self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
+            self.run_ceph_cmd('mds', 'repaired', '0')
 
             # Reset RADOS pool state
             self.fs.radosm(['import', '-'], stdin=BytesIO(serialized))
@@ -355,8 +355,9 @@ class TestDamage(CephFSTestCase):
                 # EIOs mean something handled by DamageTable: assert that it has
                 # been populated
                 damage = json.loads(
-                    self.fs.mon_manager.raw_cluster_cmd(
-                        'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty'))
+                    self.get_ceph_cmd_stdout(
+                        'tell', f'mds.{self.fs.get_active_names()[0]}',
+                        "damage", "ls", '--format=json-pretty'))
                 if len(damage) == 0:
                     results[mutation] = EIO_NO_DAMAGE
 
@@ -416,8 +417,8 @@ class TestDamage(CephFSTestCase):
 
         # The fact that there is damaged should have bee recorded
         damage = json.loads(
-            self.fs.mon_manager.raw_cluster_cmd(
-                'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+            self.get_ceph_cmd_stdout(
+                'tell', f'mds.{self.fs.get_active_names()[0]}',
                 "damage", "ls", '--format=json-pretty'))
         self.assertEqual(len(damage), 1)
         damage_id = damage[0]['id']
@@ -466,9 +467,9 @@ class TestDamage(CephFSTestCase):
         self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
 
         # Clean up the damagetable entry
-        self.fs.mon_manager.raw_cluster_cmd(
-            'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
-            "damage", "rm", "{did}".format(did=damage_id))
+        self.run_ceph_cmd(
+            'tell', f'mds.{self.fs.get_active_names()[0]}',
+            "damage", "rm", f"{damage_id}")
 
         # Now I should be able to create a file with the same name as the
         # damaged guy if I want.
@@ -520,14 +521,14 @@ class TestDamage(CephFSTestCase):
 
         # Check that an entry is created in the damage table
         damage = json.loads(
-            self.fs.mon_manager.raw_cluster_cmd(
+            self.get_ceph_cmd_stdout(
                 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
                 "damage", "ls", '--format=json-pretty'))
         self.assertEqual(len(damage), 1)
         self.assertEqual(damage[0]['damage_type'], "backtrace")
         self.assertEqual(damage[0]['ino'], file1_ino)
 
-        self.fs.mon_manager.raw_cluster_cmd(
+        self.run_ceph_cmd(
             'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
             "damage", "rm", str(damage[0]['id']))
 
@@ -545,7 +546,7 @@ class TestDamage(CephFSTestCase):
 
         # Check that an entry is created in the damage table
         damage = json.loads(
-            self.fs.mon_manager.raw_cluster_cmd(
+            self.get_ceph_cmd_stdout(
                 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
                 "damage", "ls", '--format=json-pretty'))
         self.assertEqual(len(damage), 2)
@@ -560,7 +561,7 @@ class TestDamage(CephFSTestCase):
             self.assertEqual(damage[1]['ino'], file2_ino)
 
         for entry in damage:
-            self.fs.mon_manager.raw_cluster_cmd(
+            self.run_ceph_cmd(
                 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
                 "damage", "rm", str(entry['id']))
 
diff --git a/qa/tasks/cephfs/test_data_scan.py b/qa/tasks/cephfs/test_data_scan.py
index 9a93bd622..f9f853247 100644
--- a/qa/tasks/cephfs/test_data_scan.py
+++ b/qa/tasks/cephfs/test_data_scan.py
@@ -428,7 +428,7 @@ class TestDataScan(CephFSTestCase):
         self.fs.data_scan(["scan_links"])
 
         # Mark the MDS repaired
-        self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0')
+        self.run_ceph_cmd('mds', 'repaired', '0')
 
         # Start the MDS
         self.fs.mds_restart()
@@ -491,10 +491,11 @@ class TestDataScan(CephFSTestCase):
 
         file_count = 100
         file_names = ["%s" % n for n in range(0, file_count)]
+        split_size = 100 * file_count
 
         # Make sure and disable dirfrag auto merging and splitting
-        self.fs.set_ceph_conf('mds', 'mds bal merge size', 0)
-        self.fs.set_ceph_conf('mds', 'mds bal split size', 100 * file_count)
+        self.config_set('mds', 'mds_bal_merge_size', 0)
+        self.config_set('mds', 'mds_bal_split_size', split_size)
 
         # Create a directory of `file_count` files, each named after its
         # decimal number and containing the string of its decimal number
@@ -603,7 +604,7 @@ class TestDataScan(CephFSTestCase):
             file_path = "mydir/myfile_{0}".format(i)
             ino = self.mount_a.path_to_ino(file_path)
             obj = "{0:x}.{1:08x}".format(ino, 0)
-            pgid = json.loads(self.fs.mon_manager.raw_cluster_cmd(
+            pgid = json.loads(self.get_ceph_cmd_stdout(
                 "osd", "map", self.fs.get_data_pool_name(), obj,
                 "--format=json-pretty"
             ))['pgid']
diff --git a/qa/tasks/cephfs/test_failover.py b/qa/tasks/cephfs/test_failover.py
index ddcc58ccc..ba2c3f76f 100644
--- a/qa/tasks/cephfs/test_failover.py
+++ b/qa/tasks/cephfs/test_failover.py
@@ -151,8 +151,39 @@ class TestClusterAffinity(CephFSTestCase):
         ranks = list(self.fs.get_ranks(status=status))
         self.assertEqual(len(ranks), 1)
         self.assertIn(ranks[0]['name'], standbys)
-        # Note that we would expect the former active to reclaim its spot, but
-        # we're not testing that here.
+
+        # Wait for the former active to reclaim its spot
+        def reclaimed():
+            ranks = list(self.fs.get_ranks())
+            return len(ranks) > 0 and ranks[0]['name'] not in standbys
+
+        log.info("Waiting for former active to reclaim its spot")
+        self.wait_until_true(reclaimed, timeout=self.fs.beacon_timeout)
+
+    def test_join_fs_last_resort_refused(self):
+        """
+        That a standby with mds_join_fs set to another fs is not used if refuse_standby_for_another_fs is set.
+        """
+        status, target = self._verify_init()
+        standbys = [info['name'] for info in status.get_standbys()]
+        for mds in standbys:
+            self.config_set('mds.'+mds, 'mds_join_fs', 'cephfs2')
+        fs2 = self.mds_cluster.newfs(name="cephfs2")
+        for mds in standbys:
+            self._change_target_state(target, mds, {'join_fscid': fs2.id})
+        self.fs.set_refuse_standby_for_another_fs(True)
+        self.fs.rank_fail()
+        status = self.fs.status()
+        ranks = list(self.fs.get_ranks(status=status))
+        self.assertTrue(len(ranks) == 0 or ranks[0]['name'] not in standbys)
+
+        # Wait for the former active to reclaim its spot
+        def reclaimed():
+            ranks = list(self.fs.get_ranks())
+            return len(ranks) > 0 and ranks[0]['name'] not in standbys
+
+        log.info("Waiting for former active to reclaim its spot")
+        self.wait_until_true(reclaimed, timeout=self.fs.beacon_timeout)
 
     def test_join_fs_steady(self):
         """
@@ -414,7 +445,7 @@ class TestFailover(CephFSTestCase):
 
         standbys = self.mds_cluster.get_standby_daemons()
         self.assertGreaterEqual(len(standbys), 1)
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)))
+        self.run_ceph_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)))
 
         # Kill a standby and check for warning
         victim = standbys.pop()
@@ -432,11 +463,11 @@ class TestFailover(CephFSTestCase):
         # Set it one greater than standbys ever seen
         standbys = self.mds_cluster.get_standby_daemons()
         self.assertGreaterEqual(len(standbys), 1)
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)+1))
+        self.run_ceph_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)+1))
         self.wait_for_health("MDS_INSUFFICIENT_STANDBY", self.fs.beacon_timeout)
 
         # Set it to 0
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0')
+        self.run_ceph_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0')
         self.wait_for_health_clear(timeout=30)
 
     def test_discontinuous_mdsmap(self):
@@ -685,9 +716,8 @@ class TestMultiFilesystems(CephFSTestCase):
 
     def setUp(self):
         super(TestMultiFilesystems, self).setUp()
-        self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set",
-            "enable_multiple", "true",
-            "--yes-i-really-mean-it")
+        self.run_ceph_cmd("fs", "flag", "set", "enable_multiple",
+                          "true", "--yes-i-really-mean-it")
 
     def _setup_two(self):
         fs_a = self.mds_cluster.newfs(name="alpha")
@@ -701,7 +731,7 @@ class TestMultiFilesystems(CephFSTestCase):
 
         # Reconfigure client auth caps
         for mount in self.mounts:
-            self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+            self.get_ceph_cmd_result(
                 'auth', 'caps', "client.{0}".format(mount.client_id),
                 'mds', 'allow',
                 'mon', 'allow r',
@@ -769,7 +799,7 @@ class TestMultiFilesystems(CephFSTestCase):
 
         # Kill fs_a's active MDS, see a standby take over
         self.mds_cluster.mds_stop(original_a)
-        self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_a)
+        self.run_ceph_cmd("mds", "fail", original_a)
         self.wait_until_equal(lambda: len(fs_a.get_active_names()), 1, 30,
                               reject_fn=lambda v: v > 1)
         # Assert that it's a *different* daemon that has now appeared in the map for fs_a
@@ -777,7 +807,7 @@ class TestMultiFilesystems(CephFSTestCase):
 
         # Kill fs_b's active MDS, see a standby take over
         self.mds_cluster.mds_stop(original_b)
-        self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_b)
+        self.run_ceph_cmd("mds", "fail", original_b)
         self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30,
                               reject_fn=lambda v: v > 1)
         # Assert that it's a *different* daemon that has now appeared in the map for fs_a
diff --git a/qa/tasks/cephfs/test_forward_scrub.py b/qa/tasks/cephfs/test_forward_scrub.py
index f3cec881b..334a73e1c 100644
--- a/qa/tasks/cephfs/test_forward_scrub.py
+++ b/qa/tasks/cephfs/test_forward_scrub.py
@@ -9,6 +9,7 @@ how the functionality responds to damaged metadata.
 """
 import logging
 import json
+import errno
 
 from collections import namedtuple
 from io import BytesIO
@@ -46,6 +47,9 @@ class TestForwardScrub(CephFSTestCase):
 
         return inos
 
+    def _is_MDS_damage(self):
+        return "MDS_DAMAGE" in self.mds_cluster.mon_manager.get_mon_health()['checks']
+
     def test_apply_tag(self):
         self.mount_a.run_shell(["mkdir", "parentdir"])
         self.mount_a.run_shell(["mkdir", "parentdir/childdir"])
@@ -305,3 +309,207 @@ class TestForwardScrub(CephFSTestCase):
         backtrace = self.fs.read_backtrace(file_ino)
         self.assertEqual(['alpha', 'parent_a'],
                          [a['dname'] for a in backtrace['ancestors']])
+
+    def test_health_status_after_dentry_repair(self):
+        """
+        Test that the damage health status is cleared
+        after the damaged dentry is repaired
+        """
+        # Create a file for checks
+        self.mount_a.run_shell(["mkdir", "subdir/"])
+
+        self.mount_a.run_shell(["touch", "subdir/file_undamaged"])
+        self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"])
+
+        subdir_ino = self.mount_a.path_to_ino("subdir")
+
+        self.mount_a.umount_wait()
+        for mds_name in self.fs.get_active_names():
+            self.fs.mds_asok(["flush", "journal"], mds_name)
+
+        self.fs.fail()
+
+        # Corrupt a dentry
+        junk = "deadbeef" * 10
+        dirfrag_obj = "{0:x}.00000000".format(subdir_ino)
+        self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
+
+        # Start up and try to list it
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+
+        self.mount_a.mount_wait()
+        dentries = self.mount_a.ls("subdir/")
+
+        # The damaged guy should have disappeared
+        self.assertEqual(dentries, ["file_undamaged"])
+
+        # I should get ENOENT if I try and read it normally, because
+        # the dir is considered complete
+        try:
+            self.mount_a.stat("subdir/file_to_be_damaged", wait=True)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, errno.ENOENT)
+        else:
+            raise AssertionError("Expected ENOENT")
+
+        nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
+        self.assertEqual(nfiles, "2")
+
+        self.mount_a.umount_wait()
+
+        out_json = self.fs.run_scrub(["start", "/subdir", "recursive"])
+        self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+        # Check that an entry for dentry damage is created in the damage table
+        damage = json.loads(
+            self.fs.mon_manager.raw_cluster_cmd(
+                'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+                "damage", "ls", '--format=json-pretty'))
+        self.assertEqual(len(damage), 1)
+        self.assertEqual(damage[0]['damage_type'], "dentry")
+        self.wait_until_true(lambda: self._is_MDS_damage(), timeout=100)
+
+        out_json = self.fs.run_scrub(["start", "/subdir", "repair,recursive"])
+        self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+        # Check that the entry is cleared from the damage table
+        damage = json.loads(
+            self.fs.mon_manager.raw_cluster_cmd(
+                'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+                "damage", "ls", '--format=json-pretty'))
+        self.assertEqual(len(damage), 0)
+        self.wait_until_true(lambda: not self._is_MDS_damage(), timeout=100)
+
+        self.mount_a.mount_wait()
+
+        # Check that the file count is now correct
+        nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files")
+        self.assertEqual(nfiles, "1")
+
+        # Clean up the omap object
+        self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk])
+
+    def test_health_status_after_dirfrag_repair(self):
+        """
+        Test that the damage health status is cleared
+        after the damaged dirfrag is repaired
+        """
+        self.mount_a.run_shell(["mkdir", "dir"])
+        self.mount_a.run_shell(["touch", "dir/file"])
+        self.mount_a.run_shell(["mkdir", "testdir"])
+        self.mount_a.run_shell(["ln", "dir/file", "testdir/hardlink"])
+
+        dir_ino = self.mount_a.path_to_ino("dir")
+
+        # Ensure everything is written to backing store
+        self.mount_a.umount_wait()
+        self.fs.mds_asok(["flush", "journal"])
+
+        # Drop everything from the MDS cache
+        self.fs.fail()
+
+        self.fs.radosm(["rm", "{0:x}.00000000".format(dir_ino)])
+
+        self.fs.journal_tool(['journal', 'reset'], 0)
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+        self.mount_a.mount_wait()
+
+        # Check that touching the hardlink gives EIO
+        ran = self.mount_a.run_shell(["stat", "testdir/hardlink"], wait=False)
+        try:
+            ran.wait()
+        except CommandFailedError:
+            self.assertTrue("Input/output error" in ran.stderr.getvalue())
+
+        out_json = self.fs.run_scrub(["start", "/dir", "recursive"])
+        self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+        # Check that an entry is created in the damage table
+        damage = json.loads(
+            self.fs.mon_manager.raw_cluster_cmd(
+                'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+                "damage", "ls", '--format=json-pretty'))
+        self.assertEqual(len(damage), 3)
+        damage_types = set()
+        for i in range(0, 3):
+            damage_types.add(damage[i]['damage_type'])
+        self.assertIn("dir_frag", damage_types)
+        self.wait_until_true(lambda: self._is_MDS_damage(), timeout=100)
+
+        out_json = self.fs.run_scrub(["start", "/dir", "recursive,repair"])
+        self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+        # Check that the entry is cleared from the damage table
+        damage = json.loads(
+            self.fs.mon_manager.raw_cluster_cmd(
+                'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+                "damage", "ls", '--format=json-pretty'))
+        self.assertEqual(len(damage), 1)
+        self.assertNotEqual(damage[0]['damage_type'], "dir_frag")
+
+        self.mount_a.umount_wait()
+        self.fs.mds_asok(["flush", "journal"])
+        self.fs.fail()
+
+        # Run cephfs-data-scan
+        self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
+        self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()])
+        self.fs.data_scan(["scan_links"])
+
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+        self.mount_a.mount_wait()
+
+        out_json = self.fs.run_scrub(["start", "/dir", "recursive,repair"])
+        self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+        damage = json.loads(
+            self.fs.mon_manager.raw_cluster_cmd(
+                'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+                "damage", "ls", '--format=json-pretty'))
+        self.assertEqual(len(damage), 0)
+        self.wait_until_true(lambda: not self._is_MDS_damage(), timeout=100)
+
+    def test_health_status_after_backtrace_repair(self):
+        """
+        Test that the damage health status is cleared
+        after the damaged backtrace is repaired
+        """
+        # Create a file for checks
+        self.mount_a.run_shell(["mkdir", "dir_test"])
+        self.mount_a.run_shell(["touch", "dir_test/file"])
+        file_ino = self.mount_a.path_to_ino("dir_test/file")
+
+        # That backtrace and layout are written after initial flush
+        self.fs.mds_asok(["flush", "journal"])
+        backtrace = self.fs.read_backtrace(file_ino)
+        self.assertEqual(['file', 'dir_test'],
+                         [a['dname'] for a in backtrace['ancestors']])
+
+        # Corrupt the backtrace
+        self.fs._write_data_xattr(file_ino, "parent",
+                                  "The backtrace is corrupted")
+
+        out_json = self.fs.run_scrub(["start", "/", "recursive"])
+        self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+        
+        # Check that an entry for backtrace damage is created in the damage table
+        damage = json.loads(
+            self.fs.mon_manager.raw_cluster_cmd(
+                'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+                "damage", "ls", '--format=json-pretty'))
+        self.assertEqual(len(damage), 1)
+        self.assertEqual(damage[0]['damage_type'], "backtrace")
+        self.wait_until_true(lambda: self._is_MDS_damage(), timeout=100)
+
+        out_json = self.fs.run_scrub(["start", "/", "repair,recursive,force"])
+        self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True)
+
+        # Check that the entry is cleared from the damage table
+        damage = json.loads(
+            self.fs.mon_manager.raw_cluster_cmd(
+                'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
+                "damage", "ls", '--format=json-pretty'))
+        self.assertEqual(len(damage), 0)
+        self.wait_until_true(lambda: not self._is_MDS_damage(), timeout=100)
diff --git a/qa/tasks/cephfs/test_fragment.py b/qa/tasks/cephfs/test_fragment.py
index 7d35ec0df..902a53e79 100644
--- a/qa/tasks/cephfs/test_fragment.py
+++ b/qa/tasks/cephfs/test_fragment.py
@@ -160,14 +160,13 @@ class TestFragmentation(CephFSTestCase):
             target_files = branch_factor**depth * int(split_size * 1.5)
             create_files = target_files - files_written
 
-            self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
+            self.run_ceph_cmd("log",
                 "{0} Writing {1} files (depth={2})".format(
                     self.__class__.__name__, create_files, depth
                 ))
             self.mount_a.create_n_files("splitdir/file_{0}".format(depth),
                                         create_files)
-            self.ceph_cluster.mon_manager.raw_cluster_cmd("log",
-                "{0} Done".format(self.__class__.__name__))
+            self.run_ceph_cmd("log","{0} Done".format(self.__class__.__name__))
 
             files_written += create_files
             log.info("Now have {0} files".format(files_written))
diff --git a/qa/tasks/cephfs/test_fstop.py b/qa/tasks/cephfs/test_fstop.py
index ed76eaac2..09896703d 100644
--- a/qa/tasks/cephfs/test_fstop.py
+++ b/qa/tasks/cephfs/test_fstop.py
@@ -20,10 +20,10 @@ class TestFSTop(CephFSTestCase):
         super(TestFSTop, self).tearDown()
 
     def _enable_mgr_stats_plugin(self):
-        return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "stats")
+        return self.get_ceph_cmd_stdout("mgr", "module", "enable", "stats")
 
     def _disable_mgr_stats_plugin(self):
-        return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "stats")
+        return self.get_ceph_cmd_stdout("mgr", "module", "disable", "stats")
 
     def _fstop_dump(self, *args):
         return self.mount_a.run_shell(['cephfs-top',
@@ -66,7 +66,7 @@ class TestFSTop(CephFSTestCase):
         Tests 'cephfs-top --dump' output is valid
         """
         def verify_fstop_metrics(metrics):
-            clients = metrics.get(self.fs.name, {})
+            clients = metrics.get('filesystems').get(self.fs.name, {})
             if str(self.mount_a.get_global_id()) in clients and \
                str(self.mount_b.get_global_id()) in clients:
                 return True
@@ -93,8 +93,8 @@ class TestFSTop(CephFSTestCase):
         # umount mount_b, mount another filesystem on it and use --dumpfs filter
         self.mount_b.umount_wait()
 
-        self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set", "enable_multiple", "true",
-                                                     "--yes-i-really-mean-it")
+        self.run_ceph_cmd("fs", "flag", "set", "enable_multiple", "true",
+                          "--yes-i-really-mean-it")
 
         # create a new filesystem
         fs_b = self.mds_cluster.newfs(name=newfs_name)
diff --git a/qa/tasks/cephfs/test_full.py b/qa/tasks/cephfs/test_full.py
index 2b3a7d5f9..90a65f069 100644
--- a/qa/tasks/cephfs/test_full.py
+++ b/qa/tasks/cephfs/test_full.py
@@ -61,10 +61,10 @@ class FullnessTestCase(CephFSTestCase):
         self.assertGreaterEqual(mount_a_initial_epoch, self.initial_osd_epoch)
 
         # Set and unset a flag to cause OSD epoch to increment
-        self.fs.mon_manager.raw_cluster_cmd("osd", "set", "pause")
-        self.fs.mon_manager.raw_cluster_cmd("osd", "unset", "pause")
+        self.run_ceph_cmd("osd", "set", "pause")
+        self.run_ceph_cmd("osd", "unset", "pause")
 
-        out = self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip()
+        out = self.get_ceph_cmd_stdout("osd", "dump", "--format=json").strip()
         new_epoch = json.loads(out)['epoch']
         self.assertNotEqual(self.initial_osd_epoch, new_epoch)
 
@@ -138,7 +138,7 @@ class FullnessTestCase(CephFSTestCase):
         # Wait for the MDS to see the latest OSD map so that it will reliably
         # be applying the policy of rejecting non-deletion metadata operations
         # while in the full state.
-        osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
+        osd_epoch = json.loads(self.get_ceph_cmd_stdout("osd", "dump", "--format=json-pretty"))['epoch']
         self.wait_until_true(
             lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch,
             timeout=10)
@@ -167,7 +167,7 @@ class FullnessTestCase(CephFSTestCase):
 
         # Wait for the MDS to see the latest OSD map so that it will reliably
         # be applying the free space policy
-        osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch']
+        osd_epoch = json.loads(self.get_ceph_cmd_stdout("osd", "dump", "--format=json-pretty"))['epoch']
         self.wait_until_true(
             lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch,
             timeout=10)
@@ -376,8 +376,8 @@ class TestQuotaFull(FullnessTestCase):
         super(TestQuotaFull, self).setUp()
 
         pool_name = self.fs.get_data_pool_name()
-        self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", pool_name,
-                                            "max_bytes", "{0}".format(self.pool_capacity))
+        self.run_ceph_cmd("osd", "pool", "set-quota", pool_name,
+                          "max_bytes", f"{self.pool_capacity}")
 
 
 class TestClusterFull(FullnessTestCase):
diff --git a/qa/tasks/cephfs/test_journal_repair.py b/qa/tasks/cephfs/test_journal_repair.py
index c5769784d..365140fd9 100644
--- a/qa/tasks/cephfs/test_journal_repair.py
+++ b/qa/tasks/cephfs/test_journal_repair.py
@@ -233,8 +233,8 @@ class TestJournalRepair(CephFSTestCase):
         self.fs.table_tool(["0", "reset", "session"])
         self.fs.journal_tool(["journal", "reset"], 0)
         self.fs.erase_mds_objects(1)
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name,
-                '--yes-i-really-mean-it')
+        self.run_ceph_cmd('fs', 'reset', self.fs.name,
+                          '--yes-i-really-mean-it')
 
         # Bring an MDS back online, mount a client, and see that we can walk the full
         # filesystem tree again
diff --git a/qa/tasks/cephfs/test_mantle.py b/qa/tasks/cephfs/test_mantle.py
index 746c2ffe3..92583b502 100644
--- a/qa/tasks/cephfs/test_mantle.py
+++ b/qa/tasks/cephfs/test_mantle.py
@@ -22,7 +22,7 @@ class TestMantle(CephFSTestCase):
             self.fs.mds_asok(['config', 'set', 'debug_mds_balancer', '5'], mds_id=m)
 
     def push_balancer(self, obj, lua_code, expect):
-        self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', obj)
+        self.get_ceph_cmd_result('fs', 'set', self.fs.name, 'balancer', obj)
         self.fs.radosm(["put", obj, "-"], stdin=StringIO(lua_code))
         with self.assert_cluster_log(failure + obj + " " + expect):
             log.info("run a " + obj + " balancer that expects=" + expect)
@@ -31,16 +31,16 @@ class TestMantle(CephFSTestCase):
         self.start_mantle()
         expect = " : (2) No such file or directory"
 
-        ret = self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer')
+        ret = self.get_ceph_cmd_result('fs', 'set', self.fs.name, 'balancer')
         assert(ret == 22) # EINVAL
 
-        self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', " ")
+        self.get_ceph_cmd_result('fs', 'set', self.fs.name, 'balancer', " ")
         with self.assert_cluster_log(failure + " " + expect): pass
 
     def test_version_not_in_rados(self):
         self.start_mantle()
         expect = failure + "ghost.lua : (2) No such file or directory"
-        self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "ghost.lua")
+        self.get_ceph_cmd_result('fs', 'set', self.fs.name, 'balancer', "ghost.lua")
         with self.assert_cluster_log(expect): pass
 
     def test_balancer_invalid(self):
@@ -59,7 +59,7 @@ class TestMantle(CephFSTestCase):
     def test_balancer_valid(self):
         self.start_mantle()
         lua_code = "BAL_LOG(0, \"test\")\nreturn {3, 4}"
-        self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua")
+        self.get_ceph_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua")
         self.fs.radosm(["put", "valid.lua", "-"], stdin=StringIO(lua_code))
         with self.assert_cluster_log(success + "valid.lua"):
             log.info("run a valid.lua balancer")
@@ -94,13 +94,13 @@ class TestMantle(CephFSTestCase):
         expect = " : (110) Connection timed out"
 
         # kill the OSDs so that the balancer pull from RADOS times out
-        osd_map = json.loads(self.fs.mon_manager.raw_cluster_cmd('osd', 'dump', '--format=json-pretty'))
+        osd_map = json.loads(self.get_ceph_cmd_stdout('osd', 'dump', '--format=json-pretty'))
         for i in range(0, len(osd_map['osds'])):
-          self.fs.mon_manager.raw_cluster_cmd_result('osd', 'down', str(i))
-          self.fs.mon_manager.raw_cluster_cmd_result('osd', 'out', str(i))
+          self.get_ceph_cmd_result('osd', 'down', str(i))
+          self.get_ceph_cmd_result('osd', 'out', str(i))
 
         # trigger a pull from RADOS
-        self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua")
+        self.get_ceph_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua")
 
         # make the timeout a little longer since dead OSDs spam ceph -w
         with self.assert_cluster_log(failure + "valid.lua" + expect, timeout=30):
@@ -108,4 +108,4 @@ class TestMantle(CephFSTestCase):
 
         # cleanup
         for i in range(0, len(osd_map['osds'])):
-          self.fs.mon_manager.raw_cluster_cmd_result('osd', 'in', str(i))
+          self.get_ceph_cmd_result('osd', 'in', str(i))
diff --git a/qa/tasks/cephfs/test_mds_metrics.py b/qa/tasks/cephfs/test_mds_metrics.py
index ad877f622..0e824d3d2 100644
--- a/qa/tasks/cephfs/test_mds_metrics.py
+++ b/qa/tasks/cephfs/test_mds_metrics.py
@@ -57,13 +57,13 @@ class TestMDSMetrics(CephFSTestCase):
         return verify_metrics_cbk
 
     def _fs_perf_stats(self, *args):
-        return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", *args)
+        return self.get_ceph_cmd_stdout("fs", "perf", "stats", *args)
 
     def _enable_mgr_stats_plugin(self):
-        return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "stats")
+        return self.get_ceph_cmd_stdout("mgr", "module", "enable", "stats")
 
     def _disable_mgr_stats_plugin(self):
-        return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "stats")
+        return self.get_ceph_cmd_stdout("mgr", "module", "disable", "stats")
 
     def _spread_directory_on_all_ranks(self, fscid):
         fs_status = self.fs.status()
@@ -115,7 +115,7 @@ class TestMDSMetrics(CephFSTestCase):
 
         # Reconfigure client auth caps
         for mount in self.mounts:
-            self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+            self.get_ceph_cmd_result(
                 'auth', 'caps', f"client.{mount.client_id}",
                 'mds', 'allow',
                 'mon', 'allow r',
@@ -404,7 +404,7 @@ class TestMDSMetrics(CephFSTestCase):
         invalid_mds_rank = "1,"
         # try, 'fs perf stat' command with invalid mds_rank
         try:
-            self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--mds_rank", invalid_mds_rank)
+            self.run_ceph_cmd("fs", "perf", "stats", "--mds_rank", invalid_mds_rank)
         except CommandFailedError as ce:
             if ce.exitstatus != errno.EINVAL:
                 raise
@@ -415,7 +415,7 @@ class TestMDSMetrics(CephFSTestCase):
         invalid_client_id = "abcd"
         # try, 'fs perf stat' command with invalid client_id
         try:
-            self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--client_id", invalid_client_id)
+            self.run_ceph_cmd("fs", "perf", "stats", "--client_id", invalid_client_id)
         except CommandFailedError as ce:
             if ce.exitstatus != errno.EINVAL:
                 raise
@@ -426,7 +426,7 @@ class TestMDSMetrics(CephFSTestCase):
         invalid_client_ip = "1.2.3"
         # try, 'fs perf stat' command with invalid client_ip
         try:
-            self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--client_ip", invalid_client_ip)
+            self.run_ceph_cmd("fs", "perf", "stats", "--client_ip", invalid_client_ip)
         except CommandFailedError as ce:
             if ce.exitstatus != errno.EINVAL:
                 raise
@@ -501,8 +501,8 @@ class TestMDSMetrics(CephFSTestCase):
         self.mount_b.umount_wait()
         self.fs.delete_all_filesystems()
 
-        self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set",
-            "enable_multiple", "true", "--yes-i-really-mean-it")
+        self.run_ceph_cmd("fs", "flag", "set", "enable_multiple",
+            "true", "--yes-i-really-mean-it")
 
         # creating filesystem
         fs_a = self._setup_fs(fs_name="fs1")
@@ -569,8 +569,8 @@ class TestMDSMetrics(CephFSTestCase):
         self.mount_a.umount_wait()
         self.mount_b.umount_wait()
 
-        self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set",
-                    "enable_multiple", "true", "--yes-i-really-mean-it")
+        self.run_ceph_cmd("fs", "flag", "set", "enable_multiple",
+            "true", "--yes-i-really-mean-it")
 
         # creating filesystem
         fs_b = self._setup_fs(fs_name="fs2")
diff --git a/qa/tasks/cephfs/test_mirroring.py b/qa/tasks/cephfs/test_mirroring.py
index c1a940e3f..6e57df5d0 100644
--- a/qa/tasks/cephfs/test_mirroring.py
+++ b/qa/tasks/cephfs/test_mirroring.py
@@ -21,6 +21,10 @@ class TestMirroring(CephFSTestCase):
 
     MODULE_NAME = "mirroring"
 
+    PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR = "cephfs_mirror"
+    PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS = "cephfs_mirror_mirrored_filesystems"
+    PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER = "cephfs_mirror_peers"
+
     def setUp(self):
         super(TestMirroring, self).setUp()
         self.primary_fs_name = self.fs.name
@@ -34,13 +38,16 @@ class TestMirroring(CephFSTestCase):
         super(TestMirroring, self).tearDown()
 
     def enable_mirroring_module(self):
-        self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", TestMirroring.MODULE_NAME)
+        self.run_ceph_cmd("mgr", "module", "enable", TestMirroring.MODULE_NAME)
 
     def disable_mirroring_module(self):
-        self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", TestMirroring.MODULE_NAME)
+        self.run_ceph_cmd("mgr", "module", "disable", TestMirroring.MODULE_NAME)
 
     def enable_mirroring(self, fs_name, fs_id):
-        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "enable", fs_name)
+        res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump')
+        vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR][0]
+
+        self.run_ceph_cmd("fs", "snapshot", "mirror", "enable", fs_name)
         time.sleep(10)
         # verify via asok
         res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
@@ -48,8 +55,20 @@ class TestMirroring(CephFSTestCase):
         self.assertTrue(res['peers'] == {})
         self.assertTrue(res['snap_dirs']['dir_count'] == 0)
 
+        # verify labelled perf counter
+        res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump')
+        self.assertEqual(res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0]["labels"]["filesystem"],
+                         fs_name)
+        vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR][0]
+
+        self.assertGreater(vafter["counters"]["mirrored_filesystems"],
+                           vbefore["counters"]["mirrored_filesystems"])
+
     def disable_mirroring(self, fs_name, fs_id):
-        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "disable", fs_name)
+        res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump')
+        vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR][0]
+
+        self.run_ceph_cmd("fs", "snapshot", "mirror", "disable", fs_name)
         time.sleep(10)
         # verify via asok
         try:
@@ -60,6 +79,13 @@ class TestMirroring(CephFSTestCase):
         else:
             raise RuntimeError('expected admin socket to be unavailable')
 
+        # verify labelled perf counter
+        res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump')
+        vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR][0]
+
+        self.assertLess(vafter["counters"]["mirrored_filesystems"],
+                        vbefore["counters"]["mirrored_filesystems"])
+
     def verify_peer_added(self, fs_name, fs_id, peer_spec, remote_fs_name=None):
         # verify via asok
         res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
@@ -74,40 +100,62 @@ class TestMirroring(CephFSTestCase):
         else:
             self.assertTrue(self.fs_name == res['peers'][peer_uuid]['remote']['fs_name'])
 
-    def peer_add(self, fs_name, fs_id, peer_spec, remote_fs_name=None):
+    def peer_add(self, fs_name, fs_id, peer_spec, remote_fs_name=None, check_perf_counter=True):
+        if check_perf_counter:
+            res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump')
+            vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0]
+
         if remote_fs_name:
-            self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_add", fs_name, peer_spec, remote_fs_name)
+            self.run_ceph_cmd("fs", "snapshot", "mirror", "peer_add", fs_name, peer_spec, remote_fs_name)
         else:
-            self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_add", fs_name, peer_spec)
+            self.run_ceph_cmd("fs", "snapshot", "mirror", "peer_add", fs_name, peer_spec)
         time.sleep(10)
         self.verify_peer_added(fs_name, fs_id, peer_spec, remote_fs_name)
 
+        if check_perf_counter:
+            res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump')
+            vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0]
+            self.assertGreater(vafter["counters"]["mirroring_peers"], vbefore["counters"]["mirroring_peers"])
+
     def peer_remove(self, fs_name, fs_id, peer_spec):
+        res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump')
+        vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0]
+
         peer_uuid = self.get_peer_uuid(peer_spec)
-        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_remove", fs_name, peer_uuid)
+        self.run_ceph_cmd("fs", "snapshot", "mirror", "peer_remove", fs_name, peer_uuid)
         time.sleep(10)
         # verify via asok
         res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
                                          'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
         self.assertTrue(res['peers'] == {} and res['snap_dirs']['dir_count'] == 0)
 
+        res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump')
+        vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0]
+
+        self.assertLess(vafter["counters"]["mirroring_peers"], vbefore["counters"]["mirroring_peers"])
+
     def bootstrap_peer(self, fs_name, client_name, site_name):
-        outj = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd(
-            "fs", "snapshot", "mirror", "peer_bootstrap", "create", fs_name, client_name, site_name))
+        outj = json.loads(self.get_ceph_cmd_stdout(
+            "fs", "snapshot", "mirror", "peer_bootstrap", "create", fs_name,
+            client_name, site_name))
         return outj['token']
 
     def import_peer(self, fs_name, token):
-        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_bootstrap", "import",
-                                                     fs_name, token)
+        self.run_ceph_cmd("fs", "snapshot", "mirror", "peer_bootstrap",
+                          "import", fs_name, token)
+
+    def add_directory(self, fs_name, fs_id, dir_name, check_perf_counter=True):
+        if check_perf_counter:
+            res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump')
+            vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0]
 
-    def add_directory(self, fs_name, fs_id, dir_name):
         # get initial dir count
         res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
                                          'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
         dir_count = res['snap_dirs']['dir_count']
         log.debug(f'initial dir_count={dir_count}')
 
-        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", fs_name, dir_name)
+        self.run_ceph_cmd("fs", "snapshot", "mirror", "add", fs_name, dir_name)
 
         time.sleep(10)
         # verify via asok
@@ -117,14 +165,21 @@ class TestMirroring(CephFSTestCase):
         log.debug(f'new dir_count={new_dir_count}')
         self.assertTrue(new_dir_count > dir_count)
 
+        if check_perf_counter:
+            res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump')
+            vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0]
+            self.assertGreater(vafter["counters"]["directory_count"], vbefore["counters"]["directory_count"])
+
     def remove_directory(self, fs_name, fs_id, dir_name):
+        res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump')
+        vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0]
         # get initial dir count
         res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}',
                                          'fs', 'mirror', 'status', f'{fs_name}@{fs_id}')
         dir_count = res['snap_dirs']['dir_count']
         log.debug(f'initial dir_count={dir_count}')
 
-        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "remove", fs_name, dir_name)
+        self.run_ceph_cmd("fs", "snapshot", "mirror", "remove", fs_name, dir_name)
 
         time.sleep(10)
         # verify via asok
@@ -134,6 +189,11 @@ class TestMirroring(CephFSTestCase):
         log.debug(f'new dir_count={new_dir_count}')
         self.assertTrue(new_dir_count < dir_count)
 
+        res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump')
+        vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0]
+
+        self.assertLess(vafter["counters"]["directory_count"], vbefore["counters"]["directory_count"])
+
     def check_peer_status(self, fs_name, fs_id, peer_spec, dir_name, expected_snap_name,
                           expected_snap_count):
         peer_uuid = self.get_peer_uuid(peer_spec)
@@ -234,7 +294,7 @@ class TestMirroring(CephFSTestCase):
         return json.loads(res)
 
     def get_mirror_daemon_status(self):
-        daemon_status = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "daemon", "status"))
+        daemon_status = json.loads(self.get_ceph_cmd_stdout("fs", "snapshot", "mirror", "daemon", "status"))
         log.debug(f'daemon_status: {daemon_status}')
         # running a single mirror daemon is supported
         status = daemon_status[0]
@@ -267,7 +327,7 @@ class TestMirroring(CephFSTestCase):
         self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
 
         try:
-            self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph")
+            self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", check_perf_counter=False)
         except CommandFailedError as ce:
             if ce.exitstatus != errno.EINVAL:
                 raise RuntimeError('invalid errno when adding a matching remote peer')
@@ -281,7 +341,7 @@ class TestMirroring(CephFSTestCase):
 
         # and explicitly specifying the spec (via filesystem name) should fail too
         try:
-            self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.primary_fs_name)
+            self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.primary_fs_name, check_perf_counter=False)
         except CommandFailedError as ce:
             if ce.exitstatus != errno.EINVAL:
                 raise RuntimeError('invalid errno when adding a matching remote peer')
@@ -302,7 +362,7 @@ class TestMirroring(CephFSTestCase):
         self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
 
         # adding the same peer should be idempotent
-        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+        self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name, check_perf_counter=False)
 
         # remove peer
         self.peer_remove(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph")
@@ -312,7 +372,7 @@ class TestMirroring(CephFSTestCase):
     def test_peer_commands_with_mirroring_disabled(self):
         # try adding peer when mirroring is not enabled
         try:
-            self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
+            self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name, check_perf_counter=False)
         except CommandFailedError as ce:
             if ce.exitstatus != errno.EINVAL:
                 raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a peer')
@@ -321,7 +381,7 @@ class TestMirroring(CephFSTestCase):
 
         # try removing peer
         try:
-            self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_remove", self.primary_fs_name, 'dummy-uuid')
+            self.run_ceph_cmd("fs", "snapshot", "mirror", "peer_remove", self.primary_fs_name, 'dummy-uuid')
         except CommandFailedError as ce:
             if ce.exitstatus != errno.EINVAL:
                 raise RuntimeError(-errno.EINVAL, 'incorrect error code when removing a peer')
@@ -331,7 +391,7 @@ class TestMirroring(CephFSTestCase):
     def test_add_directory_with_mirroring_disabled(self):
         # try adding a directory when mirroring is not enabled
         try:
-            self.add_directory(self.primary_fs_name, self.primary_fs_id, "/d1")
+            self.add_directory(self.primary_fs_name, self.primary_fs_id, "/d1", check_perf_counter=False)
         except CommandFailedError as ce:
             if ce.exitstatus != errno.EINVAL:
                 raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a directory')
@@ -343,7 +403,7 @@ class TestMirroring(CephFSTestCase):
         self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
         self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1')
         try:
-            self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1')
+            self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1', check_perf_counter=False)
         except CommandFailedError as ce:
             if ce.exitstatus != errno.EEXIST:
                 raise RuntimeError(-errno.EINVAL, 'incorrect error code when re-adding a directory')
@@ -363,7 +423,7 @@ class TestMirroring(CephFSTestCase):
     def test_add_relative_directory_path(self):
         self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
         try:
-            self.add_directory(self.primary_fs_name, self.primary_fs_id, './d1')
+            self.add_directory(self.primary_fs_name, self.primary_fs_id, './d1', check_perf_counter=False)
         except CommandFailedError as ce:
             if ce.exitstatus != errno.EINVAL:
                 raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a relative path dir')
@@ -377,7 +437,7 @@ class TestMirroring(CephFSTestCase):
         self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1/d2/d3')
         def check_add_command_failure(dir_path):
             try:
-                self.add_directory(self.primary_fs_name, self.primary_fs_id, dir_path)
+                self.add_directory(self.primary_fs_name, self.primary_fs_id, dir_path, check_perf_counter=False)
             except CommandFailedError as ce:
                 if ce.exitstatus != errno.EEXIST:
                     raise RuntimeError(-errno.EINVAL, 'incorrect error code when re-adding a directory')
@@ -401,7 +461,7 @@ class TestMirroring(CephFSTestCase):
         self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1/d2/')
         def check_add_command_failure(dir_path):
             try:
-                self.add_directory(self.primary_fs_name, self.primary_fs_id, dir_path)
+                self.add_directory(self.primary_fs_name, self.primary_fs_id, dir_path, check_perf_counter=False)
             except CommandFailedError as ce:
                 if ce.exitstatus != errno.EINVAL:
                     raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a directory')
@@ -466,12 +526,13 @@ class TestMirroring(CephFSTestCase):
 
     def test_cephfs_mirror_stats(self):
         log.debug('reconfigure client auth caps')
-        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+        self.get_ceph_cmd_result(
             'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
                 'mds', 'allow rw',
                 'mon', 'allow r',
                 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
-                    self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+                    self.backup_fs.get_data_pool_name(),
+                    self.backup_fs.get_data_pool_name()))
 
         log.debug(f'mounting filesystem {self.secondary_fs_name}')
         self.mount_b.umount_wait()
@@ -485,6 +546,10 @@ class TestMirroring(CephFSTestCase):
         self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
         self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
 
+        # dump perf counters
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        first = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+
         # take a snapshot
         self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
 
@@ -493,6 +558,11 @@ class TestMirroring(CephFSTestCase):
                                "client.mirror_remote@ceph", '/d0', 'snap0', 1)
         self.verify_snapshot('d0', 'snap0')
 
+        # check snaps_synced
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        second = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        self.assertGreater(second["counters"]["snaps_synced"], first["counters"]["snaps_synced"])
+
         # some more IO
         self.mount_a.run_shell(["mkdir", "d0/d00"])
         self.mount_a.run_shell(["mkdir", "d0/d01"])
@@ -508,6 +578,11 @@ class TestMirroring(CephFSTestCase):
                                "client.mirror_remote@ceph", '/d0', 'snap1', 2)
         self.verify_snapshot('d0', 'snap1')
 
+        # check snaps_synced
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        third = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        self.assertGreater(third["counters"]["snaps_synced"], second["counters"]["snaps_synced"])
+
         # delete a snapshot
         self.mount_a.run_shell(["rmdir", "d0/.snap/snap0"])
 
@@ -516,6 +591,10 @@ class TestMirroring(CephFSTestCase):
         self.assertTrue('snap0' not in snap_list)
         self.check_peer_status_deleted_snap(self.primary_fs_name, self.primary_fs_id,
                                             "client.mirror_remote@ceph", '/d0', 1)
+        # check snaps_deleted
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        fourth = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        self.assertGreater(fourth["counters"]["snaps_deleted"], third["counters"]["snaps_deleted"])
 
         # rename a snapshot
         self.mount_a.run_shell(["mv", "d0/.snap/snap1", "d0/.snap/snap2"])
@@ -526,18 +605,23 @@ class TestMirroring(CephFSTestCase):
         self.assertTrue('snap2' in snap_list)
         self.check_peer_status_renamed_snap(self.primary_fs_name, self.primary_fs_id,
                                             "client.mirror_remote@ceph", '/d0', 1)
+        # check snaps_renamed
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        fifth = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        self.assertGreater(fifth["counters"]["snaps_renamed"], fourth["counters"]["snaps_renamed"])
 
         self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
         self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
 
     def test_cephfs_mirror_cancel_sync(self):
         log.debug('reconfigure client auth caps')
-        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+        self.get_ceph_cmd_result(
             'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
                 'mds', 'allow rw',
                 'mon', 'allow r',
                 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
-                    self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+                    self.backup_fs.get_data_pool_name(),
+                    self.backup_fs.get_data_pool_name()))
 
         log.debug(f'mounting filesystem {self.secondary_fs_name}')
         self.mount_b.umount_wait()
@@ -564,16 +648,23 @@ class TestMirroring(CephFSTestCase):
 
         snap_list = self.mount_b.ls(path='d0/.snap')
         self.assertTrue('snap0' not in snap_list)
+
+        # check sync_failures
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vmirror_peers = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        self.assertGreater(vmirror_peers["counters"]["sync_failures"], 0)
+
         self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
 
     def test_cephfs_mirror_restart_sync_on_blocklist(self):
         log.debug('reconfigure client auth caps')
-        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+        self.get_ceph_cmd_result(
             'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
                 'mds', 'allow rw',
                 'mon', 'allow r',
                 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
-                    self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+                    self.backup_fs.get_data_pool_name(),
+                    self.backup_fs.get_data_pool_name()))
 
         log.debug(f'mounting filesystem {self.secondary_fs_name}')
         self.mount_b.umount_wait()
@@ -592,6 +683,10 @@ class TestMirroring(CephFSTestCase):
         # fetch rados address for blacklist check
         rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id)
 
+        # dump perf counters
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+
         # take a snapshot
         self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
 
@@ -620,6 +715,10 @@ class TestMirroring(CephFSTestCase):
         self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
                                "client.mirror_remote@ceph", '/d0', 'snap0', expected_snap_count=1)
         self.verify_snapshot('d0', 'snap0')
+        # check snaps_synced
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        self.assertGreater(vafter["counters"]["snaps_synced"], vbefore["counters"]["snaps_synced"])
 
         self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
         self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
@@ -628,6 +727,10 @@ class TestMirroring(CephFSTestCase):
         self.enable_mirroring(self.primary_fs_name, self.primary_fs_id)
         self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
 
+        # dump perf counters
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vfirst = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+
         # add a non-existent directory for synchronization
         self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
 
@@ -644,6 +747,10 @@ class TestMirroring(CephFSTestCase):
         time.sleep(120)
         self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
                                "client.mirror_remote@ceph", '/d0', 'snap0', 1)
+        # check snaps_synced
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vsecond = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        self.assertGreater(vsecond["counters"]["snaps_synced"], vfirst["counters"]["snaps_synced"])
         self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
 
     def test_cephfs_mirror_service_daemon_status(self):
@@ -697,8 +804,8 @@ class TestMirroring(CephFSTestCase):
         self.disable_mirroring_module()
 
         # enable mirroring through mon interface -- this should result in the mirror daemon
-        # failing to enable mirroring due to absence of `cephfs_mirorr` index object.
-        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", self.primary_fs_name)
+        # failing to enable mirroring due to absence of `cephfs_mirror` index object.
+        self.run_ceph_cmd("fs", "mirror", "enable", self.primary_fs_name)
 
         with safe_while(sleep=5, tries=10, action='wait for failed state') as proceed:
             while proceed():
@@ -713,7 +820,7 @@ class TestMirroring(CephFSTestCase):
                 except:
                     pass
 
-        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", self.primary_fs_name)
+        self.run_ceph_cmd("fs", "mirror", "disable", self.primary_fs_name)
         time.sleep(10)
         # verify via asok
         try:
@@ -735,7 +842,7 @@ class TestMirroring(CephFSTestCase):
         # enable mirroring through mon interface -- this should result in the mirror daemon
         # failing to enable mirroring due to absence of `cephfs_mirror` index object.
 
-        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", self.primary_fs_name)
+        self.run_ceph_cmd("fs", "mirror", "enable", self.primary_fs_name)
         # need safe_while since non-failed status pops up as mirroring is restarted
         # internally in mirror daemon.
         with safe_while(sleep=5, tries=20, action='wait for failed state') as proceed:
@@ -766,7 +873,7 @@ class TestMirroring(CephFSTestCase):
         self.assertTrue(res['peers'] == {})
         self.assertTrue(res['snap_dirs']['dir_count'] == 0)
 
-        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", self.primary_fs_name)
+        self.run_ceph_cmd("fs", "mirror", "disable", self.primary_fs_name)
         time.sleep(10)
         # verify via asok
         try:
@@ -792,9 +899,8 @@ class TestMirroring(CephFSTestCase):
 
         # verify via peer_list interface
         peer_uuid = self.get_peer_uuid("client.mirror_peer_bootstrap@site-remote")
-        res = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_list", self.primary_fs_name))
+        res = json.loads(self.get_ceph_cmd_stdout("fs", "snapshot", "mirror", "peer_list", self.primary_fs_name))
         self.assertTrue(peer_uuid in res)
-        self.assertTrue('mon_host' in res[peer_uuid] and res[peer_uuid]['mon_host'] != '')
 
         # remove peer
         self.peer_remove(self.primary_fs_name, self.primary_fs_id, "client.mirror_peer_bootstrap@site-remote")
@@ -803,12 +909,13 @@ class TestMirroring(CephFSTestCase):
 
     def test_cephfs_mirror_symlink_sync(self):
         log.debug('reconfigure client auth caps')
-        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+        self.get_ceph_cmd_result(
             'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
                 'mds', 'allow rw',
                 'mon', 'allow r',
                 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
-                    self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+                    self.backup_fs.get_data_pool_name(),
+                    self.backup_fs.get_data_pool_name()))
 
         log.debug(f'mounting filesystem {self.secondary_fs_name}')
         self.mount_b.umount_wait()
@@ -825,6 +932,10 @@ class TestMirroring(CephFSTestCase):
         self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
         self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
 
+        # dump perf counters
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+
         # take a snapshot
         self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
 
@@ -833,6 +944,10 @@ class TestMirroring(CephFSTestCase):
                                "client.mirror_remote@ceph", '/d0', 'snap0', 1)
         self.verify_snapshot('d0', 'snap0')
 
+        # check snaps_synced
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        self.assertGreater(vafter["counters"]["snaps_synced"], vbefore["counters"]["snaps_synced"])
         self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0')
         self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
 
@@ -844,12 +959,20 @@ class TestMirroring(CephFSTestCase):
         self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0/d1/d2/d3')
         self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
 
+        # dump perf counters
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vfirst = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+
         # take a snapshot
         self.mount_a.run_shell(["mkdir", "d0/d1/d2/d3/.snap/snap0"])
 
         time.sleep(30)
         self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
                                "client.mirror_remote@ceph", '/d0/d1/d2/d3', 'snap0', 1)
+        # check snaps_synced
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vsecond = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        self.assertGreater(vsecond["counters"]["snaps_synced"], vfirst["counters"]["snaps_synced"])
 
         # create snapshots in parent directories
         self.mount_a.run_shell(["mkdir", "d0/.snap/snap_d0"])
@@ -861,12 +984,20 @@ class TestMirroring(CephFSTestCase):
         time.sleep(30)
         self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
                                "client.mirror_remote@ceph", '/d0/d1/d2/d3', 'snap1', 2)
+        # check snaps_synced
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vthird = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        self.assertGreater(vthird["counters"]["snaps_synced"], vsecond["counters"]["snaps_synced"])
 
         self.mount_a.run_shell(["rmdir", "d0/d1/d2/d3/.snap/snap0"])
         self.mount_a.run_shell(["rmdir", "d0/d1/d2/d3/.snap/snap1"])
         time.sleep(15)
         self.check_peer_status_deleted_snap(self.primary_fs_name, self.primary_fs_id,
                                             "client.mirror_remote@ceph", '/d0/d1/d2/d3', 2)
+        # check snaps_deleted
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vfourth = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        self.assertGreater(vfourth["counters"]["snaps_deleted"], vthird["counters"]["snaps_deleted"])
 
         self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0/d1/d2/d3')
         self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
@@ -893,20 +1024,20 @@ class TestMirroring(CephFSTestCase):
         dir_path_p = "/d0/d1"
         dir_path = "/d0/d1/d2"
 
-        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", self.primary_fs_name, dir_path)
+        self.run_ceph_cmd("fs", "snapshot", "mirror", "add", self.primary_fs_name, dir_path)
 
         time.sleep(10)
         # this uses an undocumented interface to get dirpath map state
-        res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path)
+        res_json = self.get_ceph_cmd_stdout("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path)
         res = json.loads(res_json)
         # there are no mirror daemons
         self.assertTrue(res['state'], 'stalled')
 
-        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "remove", self.primary_fs_name, dir_path)
+        self.run_ceph_cmd("fs", "snapshot", "mirror", "remove", self.primary_fs_name, dir_path)
 
         time.sleep(10)
         try:
-            self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path)
+            self.run_ceph_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path)
         except CommandFailedError as ce:
             if ce.exitstatus != errno.ENOENT:
                 raise RuntimeError('invalid errno when checking dirmap status for non-existent directory')
@@ -914,11 +1045,11 @@ class TestMirroring(CephFSTestCase):
             raise RuntimeError('incorrect errno when checking dirmap state for non-existent directory')
 
         # adding a parent directory should be allowed
-        self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", self.primary_fs_name, dir_path_p)
+        self.run_ceph_cmd("fs", "snapshot", "mirror", "add", self.primary_fs_name, dir_path_p)
 
         time.sleep(10)
         # however, this directory path should get stalled too
-        res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path_p)
+        res_json = self.get_ceph_cmd_stdout("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path_p)
         res = json.loads(res_json)
         # there are no mirror daemons
         self.assertTrue(res['state'], 'stalled')
@@ -930,7 +1061,7 @@ class TestMirroring(CephFSTestCase):
 
         # wait for restart mirror on blocklist
         time.sleep(60)
-        res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path_p)
+        res_json = self.get_ceph_cmd_stdout("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path_p)
         res = json.loads(res_json)
         # there are no mirror daemons
         self.assertTrue(res['state'], 'mapped')
@@ -940,12 +1071,13 @@ class TestMirroring(CephFSTestCase):
     def test_cephfs_mirror_incremental_sync(self):
         """ Test incremental snapshot synchronization (based on mtime differences)."""
         log.debug('reconfigure client auth caps')
-        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+        self.get_ceph_cmd_result(
             'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
             'mds', 'allow rw',
             'mon', 'allow r',
             'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
-                self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+                self.backup_fs.get_data_pool_name(),
+                self.backup_fs.get_data_pool_name()))
         log.debug(f'mounting filesystem {self.secondary_fs_name}')
         self.mount_b.umount_wait()
         self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
@@ -969,6 +1101,9 @@ class TestMirroring(CephFSTestCase):
         self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
 
         self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{repo_path}')
+        # dump perf counters
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vfirst = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
         self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_a'])
 
         # full copy, takes time
@@ -976,6 +1111,10 @@ class TestMirroring(CephFSTestCase):
         self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
                                "client.mirror_remote@ceph", f'/{repo_path}', 'snap_a', 1)
         self.verify_snapshot(repo_path, 'snap_a')
+        # check snaps_synced
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vsecond = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        self.assertGreater(vsecond["counters"]["snaps_synced"], vfirst["counters"]["snaps_synced"])
 
         # create some diff
         num = random.randint(5, 20)
@@ -988,6 +1127,9 @@ class TestMirroring(CephFSTestCase):
         self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
                                "client.mirror_remote@ceph", f'/{repo_path}', 'snap_b', 2)
         self.verify_snapshot(repo_path, 'snap_b')
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vthird = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        self.assertGreater(vthird["counters"]["snaps_synced"], vsecond["counters"]["snaps_synced"])
 
         # diff again, this time back to HEAD
         log.debug('resetting to HEAD')
@@ -999,6 +1141,9 @@ class TestMirroring(CephFSTestCase):
         self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
                                "client.mirror_remote@ceph", f'/{repo_path}', 'snap_c', 3)
         self.verify_snapshot(repo_path, 'snap_c')
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vfourth = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        self.assertGreater(vfourth["counters"]["snaps_synced"], vthird["counters"]["snaps_synced"])
 
         self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
 
@@ -1018,12 +1163,13 @@ class TestMirroring(CephFSTestCase):
         file_z |   sym          dir         reg         sym
         """
         log.debug('reconfigure client auth caps')
-        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+        self.get_ceph_cmd_result(
             'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
                 'mds', 'allow rw',
                 'mon', 'allow r',
                 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
-                    self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+                    self.backup_fs.get_data_pool_name(),
+                    self.backup_fs.get_data_pool_name()))
         log.debug(f'mounting filesystem {self.secondary_fs_name}')
         self.mount_b.umount_wait()
         self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
@@ -1068,11 +1214,18 @@ class TestMirroring(CephFSTestCase):
         while turns != len(typs):
             snapname = f'snap_{turns}'
             cleanup_and_create_with_type('d0', fnames)
+            # dump perf counters
+            res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+            vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
             self.mount_a.run_shell(['mkdir', f'd0/.snap/{snapname}'])
             time.sleep(30)
             self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
                                    "client.mirror_remote@ceph", '/d0', snapname, turns+1)
             verify_types('d0', fnames, snapname)
+            res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+            vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+            self.assertGreater(vafter["counters"]["snaps_synced"], vbefore["counters"]["snaps_synced"])
+
             # next type
             typs.rotate(1)
             turns += 1
@@ -1089,12 +1242,13 @@ class TestMirroring(CephFSTestCase):
         """
 
         log.debug('reconfigure client auth caps')
-        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+        self.get_ceph_cmd_result(
             'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
             'mds', 'allow rw',
             'mon', 'allow r',
             'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
-                self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+                self.backup_fs.get_data_pool_name(),
+                self.backup_fs.get_data_pool_name()))
         log.debug(f'mounting filesystem {self.secondary_fs_name}')
         self.mount_b.umount_wait()
         self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name)
@@ -1118,6 +1272,9 @@ class TestMirroring(CephFSTestCase):
         self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
 
         self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{repo_path}')
+        # dump perf counters
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vfirst = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
         self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_a'])
 
         # full copy, takes time
@@ -1125,6 +1282,9 @@ class TestMirroring(CephFSTestCase):
         self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
                                "client.mirror_remote@ceph", f'/{repo_path}', 'snap_a', 1)
         self.verify_snapshot(repo_path, 'snap_a')
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vsecond = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        self.assertGreater(vsecond["counters"]["snaps_synced"], vfirst["counters"]["snaps_synced"])
 
         # create some diff
         num = random.randint(60, 100)
@@ -1141,6 +1301,9 @@ class TestMirroring(CephFSTestCase):
         self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
                                "client.mirror_remote@ceph", f'/{repo_path}', 'snap_b', 2)
         self.verify_snapshot(repo_path, 'snap_b')
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vthird = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        self.assertGreater(vthird["counters"]["snaps_synced"], vsecond["counters"]["snaps_synced"])
 
         self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
 
@@ -1151,7 +1314,7 @@ class TestMirroring(CephFSTestCase):
         # try adding the primary file system as a peer to secondary file
         # system
         try:
-            self.peer_add(self.secondary_fs_name, self.secondary_fs_id, "client.mirror_remote@ceph", self.primary_fs_name)
+            self.peer_add(self.secondary_fs_name, self.secondary_fs_id, "client.mirror_remote@ceph", self.primary_fs_name, check_perf_counter=False)
         except CommandFailedError as ce:
             if ce.exitstatus != errno.EINVAL:
                 raise RuntimeError('invalid errno when adding a primary file system')
@@ -1169,12 +1332,13 @@ class TestMirroring(CephFSTestCase):
         that all replayer threads (3 by default) in the mirror daemon are busy.
         """
         log.debug('reconfigure client auth caps')
-        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+        self.get_ceph_cmd_result(
             'auth', 'caps', "client.{0}".format(self.mount_b.client_id),
                 'mds', 'allow rw',
                 'mon', 'allow r',
                 'osd', 'allow rw pool={0}, allow rw pool={1}'.format(
-                    self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name()))
+                    self.backup_fs.get_data_pool_name(),
+                    self.backup_fs.get_data_pool_name()))
 
         log.debug(f'mounting filesystem {self.secondary_fs_name}')
         self.mount_b.umount_wait()
@@ -1198,6 +1362,9 @@ class TestMirroring(CephFSTestCase):
         self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d2')
         self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name)
 
+        # dump perf counters
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
         # take snapshots
         log.debug('taking snapshots')
         self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"])
@@ -1259,6 +1426,10 @@ class TestMirroring(CephFSTestCase):
         self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
                                "client.mirror_remote@ceph", '/d2', 'snap0', 1)
         self.verify_snapshot('d2', 'snap0')
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        self.assertGreater(vafter["counters"]["snaps_synced"], vbefore["counters"]["snaps_synced"])
+        self.assertGreater(vafter["counters"]["snaps_deleted"], vbefore["counters"]["snaps_deleted"])
 
         self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
 
@@ -1266,7 +1437,7 @@ class TestMirroring(CephFSTestCase):
         log.debug('reconfigure client auth caps')
         cid = self.mount_b.client_id
         data_pool = self.backup_fs.get_data_pool_name()
-        self.mds_cluster.mon_manager.raw_cluster_cmd_result(
+        self.get_ceph_cmd_result(
             'auth', 'caps', f"client.{cid}",
             'mds', 'allow rw',
             'mon', 'allow r',
@@ -1287,6 +1458,11 @@ class TestMirroring(CephFSTestCase):
         time.sleep(60)
         self.check_peer_status(self.primary_fs_name, self.primary_fs_id,
                                "client.mirror_remote@ceph", '/l1', 'snap0', 1)
+        # dump perf counters
+        res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump')
+        vmirror_peers = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0]
+        snaps_synced = vmirror_peers["counters"]["snaps_synced"]
+        self.assertEqual(snaps_synced, 1, f"Mismatch snaps_synced: {snaps_synced} vs 1")
 
         mode_local = self.mount_a.run_shell(["stat", "--format=%A", "l1"]).stdout.getvalue().strip()
         mode_remote = self.mount_b.run_shell(["stat", "--format=%A", "l1"]).stdout.getvalue().strip()
@@ -1296,3 +1472,13 @@ class TestMirroring(CephFSTestCase):
         self.disable_mirroring(self.primary_fs_name, self.primary_fs_id)
         self.mount_a.run_shell(["rmdir", "l1/.snap/snap0"])
         self.mount_a.run_shell(["rmdir", "l1"])
+
+    def test_get_set_mirror_dirty_snap_id(self):
+        """
+        That get/set ceph.mirror.dirty_snap_id attribute succeeds in a remote filesystem.
+        """
+        self.mount_b.run_shell(["mkdir", "-p", "d1/d2/d3"])
+        attr = str(random.randint(1, 10))
+        self.mount_b.setfattr("d1/d2/d3", "ceph.mirror.dirty_snap_id", attr)
+        val = self.mount_b.getfattr("d1/d2/d3", "ceph.mirror.dirty_snap_id")
+        self.assertEqual(attr, val, f"Mismatch for ceph.mirror.dirty_snap_id value: {attr} vs {val}")
diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py
index 8b48dee69..72468a813 100644
--- a/qa/tasks/cephfs/test_misc.py
+++ b/qa/tasks/cephfs/test_misc.py
@@ -96,16 +96,15 @@ class TestMisc(CephFSTestCase):
 
         self.fs.fail()
 
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name,
-                                            '--yes-i-really-mean-it')
+        self.run_ceph_cmd('fs', 'rm', self.fs.name, '--yes-i-really-mean-it')
 
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
-                                            self.fs.metadata_pool_name,
-                                            self.fs.metadata_pool_name,
-                                            '--yes-i-really-really-mean-it')
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
-                                            self.fs.metadata_pool_name,
-                                            '--pg_num_min', str(self.fs.pg_num_min))
+        self.run_ceph_cmd('osd', 'pool', 'delete',
+                          self.fs.metadata_pool_name,
+                          self.fs.metadata_pool_name,
+                           '--yes-i-really-really-mean-it')
+        self.run_ceph_cmd('osd', 'pool', 'create',
+                          self.fs.metadata_pool_name,
+                          '--pg_num_min', str(self.fs.pg_num_min))
 
         # insert a garbage object
         self.fs.radosm(["put", "foo", "-"], stdin=StringIO("bar"))
@@ -119,34 +118,34 @@ class TestMisc(CephFSTestCase):
         self.wait_until_true(lambda: get_pool_df(self.fs, self.fs.metadata_pool_name), timeout=30)
 
         try:
-            self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
-                                                self.fs.metadata_pool_name,
-                                                data_pool_name)
+            self.run_ceph_cmd('fs', 'new', self.fs.name,
+                              self.fs.metadata_pool_name,
+                              data_pool_name)
         except CommandFailedError as e:
             self.assertEqual(e.exitstatus, errno.EINVAL)
         else:
             raise AssertionError("Expected EINVAL")
 
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
-                                            self.fs.metadata_pool_name,
-                                            data_pool_name, "--force")
+        self.run_ceph_cmd('fs', 'new', self.fs.name,
+                          self.fs.metadata_pool_name,
+                          data_pool_name, "--force")
 
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'fail', self.fs.name)
+        self.run_ceph_cmd('fs', 'fail', self.fs.name)
 
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name,
-                                            '--yes-i-really-mean-it')
+        self.run_ceph_cmd('fs', 'rm', self.fs.name,
+                          '--yes-i-really-mean-it')
 
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete',
-                                            self.fs.metadata_pool_name,
-                                            self.fs.metadata_pool_name,
-                                            '--yes-i-really-really-mean-it')
-        self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create',
-                                            self.fs.metadata_pool_name,
-                                            '--pg_num_min', str(self.fs.pg_num_min))
-        self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name,
-                                            self.fs.metadata_pool_name,
-                                            data_pool_name,
-                                            '--allow_dangerous_metadata_overlay')
+        self.run_ceph_cmd('osd', 'pool', 'delete',
+                          self.fs.metadata_pool_name,
+                          self.fs.metadata_pool_name,
+                          '--yes-i-really-really-mean-it')
+        self.run_ceph_cmd('osd', 'pool', 'create',
+                          self.fs.metadata_pool_name,
+                          '--pg_num_min', str(self.fs.pg_num_min))
+        self.run_ceph_cmd('fs', 'new', self.fs.name,
+                          self.fs.metadata_pool_name,
+                          data_pool_name,
+                          '--allow_dangerous_metadata_overlay')
 
     def test_cap_revoke_nonresponder(self):
         """
@@ -199,9 +198,8 @@ class TestMisc(CephFSTestCase):
         pool_name = self.fs.get_data_pool_name()
         raw_df = self.fs.get_pool_df(pool_name)
         raw_avail = float(raw_df["max_avail"])
-        out = self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'get',
-                                                  pool_name, 'size',
-                                                  '-f', 'json-pretty')
+        out = self.get_ceph_cmd_stdout('osd', 'pool', 'get', pool_name,
+                                       'size', '-f', 'json-pretty')
         _ = json.loads(out)
 
         proc = self.mount_a.run_shell(['df', '.'])
@@ -210,18 +208,39 @@ class TestMisc(CephFSTestCase):
         fs_avail = float(fs_avail) * 1024
 
         ratio = raw_avail / fs_avail
-        assert 0.9 < ratio < 1.1
+        self.assertTrue(0.9 < ratio < 1.1)
 
     def test_dump_inode(self):
         info = self.fs.mds_asok(['dump', 'inode', '1'])
-        assert(info['path'] == "/")
+        self.assertEqual(info['path'], "/")
 
     def test_dump_inode_hexademical(self):
         self.mount_a.run_shell(["mkdir", "-p", "foo"])
         ino = self.mount_a.path_to_ino("foo")
-        assert type(ino) is int
+        self.assertTrue(type(ino) is int)
         info = self.fs.mds_asok(['dump', 'inode', hex(ino)])
-        assert info['path'] == "/foo"
+        self.assertEqual(info['path'], "/foo")
+
+    def test_dump_dir(self):
+        self.mount_a.run_shell(["mkdir", "-p", "foo/bar"])
+        dirs = self.fs.mds_asok(['dump', 'dir', '/foo'])
+        self.assertTrue(type(dirs) is list)
+        for dir in dirs:
+            self.assertEqual(dir['path'], "/foo")
+            self.assertFalse("dentries" in dir)
+        dirs = self.fs.mds_asok(['dump', 'dir', '/foo', '--dentry_dump'])
+        self.assertTrue(type(dirs) is list)
+        found_dentry = False
+        for dir in dirs:
+            self.assertEqual(dir['path'], "/foo")
+            self.assertTrue(type(dir['dentries']) is list)
+            if found_dentry:
+                continue
+            for dentry in dir['dentries']:
+                if dentry['path'] == "foo/bar":
+                    found_dentry = True
+                    break
+        self.assertTrue(found_dentry)
 
     def test_fs_lsflags(self):
         """
@@ -232,9 +251,8 @@ class TestMisc(CephFSTestCase):
         self.fs.set_allow_new_snaps(False)
         self.fs.set_allow_standby_replay(True)
 
-        lsflags = json.loads(self.fs.mon_manager.raw_cluster_cmd('fs', 'lsflags',
-                                                                 self.fs.name,
-                                                                 "--format=json-pretty"))
+        lsflags = json.loads(self.get_ceph_cmd_stdout(
+            'fs', 'lsflags', self.fs.name, "--format=json-pretty"))
         self.assertEqual(lsflags["joinable"], False)
         self.assertEqual(lsflags["allow_snaps"], False)
         self.assertEqual(lsflags["allow_multimds_snaps"], True)
@@ -258,30 +276,30 @@ class TestMisc(CephFSTestCase):
                 self.mount_a.run_shell(["mkdir", os.path.join(dir_path, f"{i}_{j}")])
             start = time.time()
             if file_sync:
-                self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript])
+                self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript], timeout=4)
             else:
-                self.mount_a.run_shell(["sync"])
+                self.mount_a.run_shell(["sync"], timeout=4)
+            # the real duration should be less than the rough one
             duration = time.time() - start
-            log.info(f"sync mkdir i = {i}, duration = {duration}")
-            self.assertLess(duration, 4)
+            log.info(f"sync mkdir i = {i}, rough duration = {duration}")
 
             for j in range(5):
                 self.mount_a.run_shell(["rm", "-rf", os.path.join(dir_path, f"{i}_{j}")])
             start = time.time()
             if file_sync:
-                self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript])
+                self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript], timeout=4)
             else:
-                self.mount_a.run_shell(["sync"])
+                self.mount_a.run_shell(["sync"], timeout=4)
+            # the real duration should be less than the rough one
             duration = time.time() - start
-            log.info(f"sync rmdir i = {i}, duration = {duration}")
-            self.assertLess(duration, 4)
+            log.info(f"sync rmdir i = {i}, rough duration = {duration}")
 
         self.mount_a.run_shell(["rm", "-rf", dir_path])
 
     def test_filesystem_sync_stuck_for_around_5s(self):
         """
-        To check whether the fsync will be stuck to wait for the mdlog to be
-        flushed for at most 5 seconds.
+        To check whether the filesystem sync will be stuck to wait for the
+        mdlog to be flushed for at most 5 seconds.
         """
 
         dir_path = "filesystem_sync_do_not_wait_mdlog_testdir"
@@ -289,8 +307,8 @@ class TestMisc(CephFSTestCase):
 
     def test_file_sync_stuck_for_around_5s(self):
         """
-        To check whether the filesystem sync will be stuck to wait for the
-        mdlog to be flushed for at most 5 seconds.
+        To check whether the fsync will be stuck to wait for the mdlog to
+        be flushed for at most 5 seconds.
         """
 
         dir_path = "file_sync_do_not_wait_mdlog_testdir"
@@ -404,7 +422,7 @@ class TestMisc(CephFSTestCase):
         self.fs.mds_asok(['config', 'set', 'debug_mds', '1/10'])
         self.fs.mds_asok(['config', 'set', 'mds_extraordinary_events_dump_interval', '1'])
         try:
-            mons = json.loads(self.fs.mon_manager.raw_cluster_cmd('mon', 'dump', '-f', 'json'))['mons']
+            mons = json.loads(self.get_ceph_cmd_stdout('mon', 'dump', '-f', 'json'))['mons']
         except:
             self.assertTrue(False, "Error fetching monitors")
 
@@ -447,7 +465,7 @@ class TestMisc(CephFSTestCase):
         self.fs.mds_asok(['config', 'set', 'mds_heartbeat_grace', '1'])
         self.fs.mds_asok(['config', 'set', 'mds_extraordinary_events_dump_interval', '1'])
         try:
-            mons = json.loads(self.fs.mon_manager.raw_cluster_cmd('mon', 'dump', '-f', 'json'))['mons']
+            mons = json.loads(self.get_ceph_cmd_stdout('mon', 'dump', '-f', 'json'))['mons']
         except:
             self.assertTrue(False, "Error fetching monitors")
 
diff --git a/qa/tasks/cephfs/test_multifs_auth.py b/qa/tasks/cephfs/test_multifs_auth.py
index c9ea5f528..592a84164 100644
--- a/qa/tasks/cephfs/test_multifs_auth.py
+++ b/qa/tasks/cephfs/test_multifs_auth.py
@@ -26,15 +26,15 @@ class TestMultiFS(CephFSTestCase):
 
         # we might have it - the client - if the same cluster was used for a
         # different vstart_runner.py run.
-        self.run_cluster_cmd(f'auth rm {self.client_name}')
+        self.run_ceph_cmd(f'auth rm {self.client_name}')
 
         self.fs1 = self.fs
         self.fs2 = self.mds_cluster.newfs(name='cephfs2', create=True)
 
         # we'll reassign caps to client.1 so that it can operate with cephfs2
-        self.run_cluster_cmd(f'auth caps client.{self.mount_b.client_id} mon '
-                             f'"allow r" osd "allow rw '
-                             f'pool={self.fs2.get_data_pool_name()}" mds allow')
+        self.run_ceph_cmd(f'auth caps client.{self.mount_b.client_id} mon '
+                          f'"allow r" osd "allow rw '
+                          f'pool={self.fs2.get_data_pool_name()}" mds allow')
         self.mount_b.remount(cephfs_name=self.fs2.name)
 
 
@@ -209,54 +209,16 @@ class TestMDSCaps(TestMultiFS):
 
 
 class TestClientsWithoutAuth(TestMultiFS):
+    # c.f., src/mount/mtab.c: EX_FAIL
+    RETVAL_KCLIENT = 32
+    # c.f., src/ceph_fuse.cc: (cpp EXIT_FAILURE). Normally the check for this
+    # case should be anything-except-0, but EXIT_FAILURE is 1 in most systems.
+    RETVAL_USER_SPACE_CLIENT = 1
 
     def setUp(self):
         super(TestClientsWithoutAuth, self).setUp()
-
-        # TODO: When MON and OSD caps for a Ceph FS are assigned to a
-        # client but MDS caps are not, mount.ceph prints "permission
-        # denied". But when MON caps are not assigned and MDS and OSD
-        # caps are, mount.ceph prints "no mds server or cluster laggy"
-        # instead of "permission denied".
-        #
-        # Before uncommenting the following line a fix would be required
-        # for latter case to change "no mds server is up or the cluster is
-        #  laggy" to "permission denied".
-        self.kernel_errmsgs = ('permission denied', 'no mds server is up or '
-                               'the cluster is laggy', 'no such file or '
-                               'directory',
-                               'input/output error')
-
-        # TODO: When MON and OSD caps are assigned for a Ceph FS to a
-        # client but MDS caps are not, ceph-fuse prints "operation not
-        # permitted". But when MON caps are not assigned and MDS and OSD
-        # caps are, ceph-fuse prints "no such file or directory" instead
-        # of "operation not permitted".
-        #
-        # Before uncommenting the following line a fix would be required
-        # for the latter case to change "no such file or directory" to
-        # "operation not permitted".
-        #self.assertIn('operation not permitted', retval[2].lower())
-        self.fuse_errmsgs = ('operation not permitted', 'no such file or '
-                             'directory')
-
-        if 'kernel' in str(type(self.mount_a)).lower():
-            self.errmsgs = self.kernel_errmsgs
-        elif 'fuse' in str(type(self.mount_a)).lower():
-            self.errmsgs = self.fuse_errmsgs
-        else:
-            raise RuntimeError('strange, the client was neither based on '
-                               'kernel nor FUSE.')
-
-    def check_that_mount_failed_for_right_reason(self, stderr):
-        stderr = stderr.lower()
-        for errmsg in self.errmsgs:
-            if errmsg in stderr:
-                break
-        else:
-            raise AssertionError('can\'t find expected set of words in the '
-                                 f'stderr\nself.errmsgs - {self.errmsgs}\n'
-                                 f'stderr - {stderr}')
+        self.retval = self.RETVAL_KCLIENT if 'kernel' in str(type(self.mount_a)).lower() \
+            else self.RETVAL_USER_SPACE_CLIENT
 
     def test_mount_all_caps_absent(self):
         # setup part...
@@ -264,16 +226,13 @@ class TestClientsWithoutAuth(TestMultiFS):
         keyring_path = self.mount_a.client_remote.mktemp(data=keyring)
 
         # mount the FS for which client has no auth...
-        retval = self.mount_a.remount(client_id=self.client_id,
-                                      client_keyring_path=keyring_path,
-                                      cephfs_name=self.fs2.name,
-                                      check_status=False)
-
-        # tests...
-        self.assertIsInstance(retval, tuple)
-        self.assertEqual(len(retval), 3)
-        self.assertIsInstance(retval[0], CommandFailedError)
-        self.check_that_mount_failed_for_right_reason(retval[2])
+        try:
+            self.mount_a.remount(client_id=self.client_id,
+                                 client_keyring_path=keyring_path,
+                                 cephfs_name=self.fs2.name,
+                                 check_status=False)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, self.retval)
 
     def test_mount_mon_and_osd_caps_present_mds_caps_absent(self):
         # setup part...
@@ -285,13 +244,10 @@ class TestClientsWithoutAuth(TestMultiFS):
         keyring_path = self.mount_a.client_remote.mktemp(data=keyring)
 
         # mount the FS for which client has no auth...
-        retval = self.mount_a.remount(client_id=self.client_id,
-                                      client_keyring_path=keyring_path,
-                                      cephfs_name=self.fs2.name,
-                                      check_status=False)
-
-        # tests...
-        self.assertIsInstance(retval, tuple)
-        self.assertEqual(len(retval), 3)
-        self.assertIsInstance(retval[0], CommandFailedError)
-        self.check_that_mount_failed_for_right_reason(retval[2])
+        try:
+            self.mount_a.remount(client_id=self.client_id,
+                                 client_keyring_path=keyring_path,
+                                 cephfs_name=self.fs2.name,
+                                 check_status=False)
+        except CommandFailedError as e:
+            self.assertEqual(e.exitstatus, self.retval)
diff --git a/qa/tasks/cephfs/test_multimds_misc.py b/qa/tasks/cephfs/test_multimds_misc.py
index 2bb6257c7..e0e46fb24 100644
--- a/qa/tasks/cephfs/test_multimds_misc.py
+++ b/qa/tasks/cephfs/test_multimds_misc.py
@@ -116,7 +116,7 @@ class TestScrub2(CephFSTestCase):
 
         def expect_exdev(cmd, mds):
             try:
-                self.fs.mon_manager.raw_cluster_cmd('tell', 'mds.{0}'.format(mds), *cmd)
+                self.run_ceph_cmd('tell', 'mds.{0}'.format(mds), *cmd)
             except CommandFailedError as e:
                 if e.exitstatus == errno.EXDEV:
                     pass
diff --git a/qa/tasks/cephfs/test_nfs.py b/qa/tasks/cephfs/test_nfs.py
index 0a10709e6..2d06cbac7 100644
--- a/qa/tasks/cephfs/test_nfs.py
+++ b/qa/tasks/cephfs/test_nfs.py
@@ -16,16 +16,14 @@ NFS_POOL_NAME = '.nfs'  # should match mgr_module.py
 # TODO Add test for cluster update when ganesha can be deployed on multiple ports.
 class TestNFS(MgrTestCase):
     def _cmd(self, *args):
-        return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args)
+        return self.get_ceph_cmd_stdout(args)
 
     def _nfs_cmd(self, *args):
         return self._cmd("nfs", *args)
 
     def _nfs_complete_cmd(self, cmd):
-        return self.mgr_cluster.mon_manager.run_cluster_cmd(args=f"nfs {cmd}",
-                                                            stdout=StringIO(),
-                                                            stderr=StringIO(),
-                                                            check_status=False)
+        return self.run_ceph_cmd(args=f"nfs {cmd}", stdout=StringIO(),
+                                 stderr=StringIO(), check_status=False)
 
     def _orch_cmd(self, *args):
         return self._cmd("orch", *args)
@@ -142,7 +140,7 @@ class TestNFS(MgrTestCase):
         :param cmd_args: nfs command arguments to be run
         '''
         cmd_func()
-        ret = self.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd_args)
+        ret = self.get_ceph_cmd_result(*cmd_args)
         if ret != 0:
             self.fail("Idempotency test failed")
 
@@ -406,6 +404,13 @@ class TestNFS(MgrTestCase):
         self._cmd('fs', 'volume', 'rm', fs_name, '--yes-i-really-mean-it')
         self._test_delete_cluster()
 
+    def _nfs_export_apply(self, cluster, exports, raise_on_error=False):
+        return self.ctx.cluster.run(args=['ceph', 'nfs', 'export', 'apply',
+                                          cluster, '-i', '-'],
+                                    check_status=raise_on_error,
+                                    stdin=json.dumps(exports),
+                                    stdout=StringIO(), stderr=StringIO())
+
     def test_create_and_delete_cluster(self):
         '''
         Test successful creation and deletion of the nfs cluster.
@@ -878,3 +883,258 @@ class TestNFS(MgrTestCase):
                 raise
         self.ctx.cluster.run(args=['rm', '-rf', f'{mnt_pt}/*'])
         self._delete_cluster_with_fs(self.fs_name, mnt_pt, preserve_mode)
+
+    def test_nfs_export_apply_multiple_exports(self):
+        """
+        Test multiple export creation/update with multiple
+        export blocks provided in the json/conf file using:
+        ceph nfs export apply <nfs_cluster> -i <{conf/json}_file>, and check
+        1) if there are multiple failure:
+        -> Return the EIO and error status to CLI (along with JSON output
+           containing status of every export).
+        2) if there is single failure:
+        -> Return the respective errno and error status to CLI (along with
+           JSON output containing status of every export).
+        """
+
+        mnt_pt = self._sys_cmd(['mktemp', '-d']).decode().strip()
+        self._create_cluster_with_fs(self.fs_name, mnt_pt)
+        try:
+            self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir1'])
+            self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir2'])
+            self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir3'])
+            self._create_export(export_id='1',
+                                extra_cmd=['--pseudo-path', self.pseudo_path,
+                                           '--path', '/testdir1'])
+            self._create_export(export_id='2',
+                                extra_cmd=['--pseudo-path',
+                                           self.pseudo_path+'2',
+                                           '--path', '/testdir2'])
+            exports = [
+                {
+                    "export_id": 11,  # export_id change not allowed
+                    "path": "/testdir1",
+                    "pseudo": self.pseudo_path,
+                    "squash": "none",
+                    "access_type": "rw",
+                    "protocols": [4],
+                    "fsal": {
+                        "name": "CEPH",
+                        "user_id": "nfs.test.1",
+                        "fs_name": self.fs_name
+                    }
+                },
+                {
+                    "export_id": 2,
+                    "path": "/testdir2",
+                    "pseudo": self.pseudo_path+'2',
+                    "squash": "none",
+                    "access_type": "rw",
+                    "protocols": [4],
+                    "fsal": {
+                        "name": "CEPH",
+                        "user_id": "nfs.test.2",
+                        "fs_name": "invalid_fs_name"  # invalid fs
+                    }
+                },
+                {   # no error, export creation should succeed
+                    "export_id": 3,
+                    "path": "/testdir3",
+                    "pseudo": self.pseudo_path+'3',
+                    "squash": "none",
+                    "access_type": "rw",
+                    "protocols": [4],
+                    "fsal": {
+                        "name": "CEPH",
+                        "user_id": "nfs.test.3",
+                        "fs_name": self.fs_name
+                    }
+                }
+            ]
+
+            # multiple failures
+            ret = self._nfs_export_apply(self.cluster_id, exports)
+            self.assertEqual(ret[0].returncode, errno.EIO)
+            self.assertIn("2 export blocks (at index 1, 2) failed to be "
+                          "created/updated", ret[0].stderr.getvalue())
+
+            # single failure
+            exports[1]["fsal"]["fs_name"] = self.fs_name  # correct the fs
+            ret = self._nfs_export_apply(self.cluster_id, exports)
+            self.assertEqual(ret[0].returncode, errno.EINVAL)
+            self.assertIn("Export ID changed, Cannot update export for "
+                          "export block at index 1", ret[0].stderr.getvalue())
+        finally:
+            self._delete_cluster_with_fs(self.fs_name, mnt_pt)
+            self.ctx.cluster.run(args=['rm', '-rf', f'{mnt_pt}'])
+
+    def test_nfs_export_apply_single_export(self):
+        """
+        Test that when single export creation/update fails with multiple
+        export blocks provided in the json/conf file using:
+        ceph nfs export apply <nfs_cluster> -i <{conf/json}_file>, it
+        returns the respective errno and error status to CLI (along with
+        JSON output containing status of every export).
+        """
+
+        mnt_pt = self._sys_cmd(['mktemp', '-d']).decode().strip()
+        self._create_cluster_with_fs(self.fs_name, mnt_pt)
+        try:
+            self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir1'])
+            self._create_export(export_id='1',
+                                extra_cmd=['--pseudo-path', self.pseudo_path,
+                                           '--path', '/testdir1'])
+            export = {
+                "export_id": 1,
+                "path": "/testdir1",
+                "pseudo": self.pseudo_path,
+                "squash": "none",
+                "access_type": "rw",
+                "protocols": [4],
+                "fsal": {
+                    "name": "CEPH",
+                    "user_id": "nfs.test.1",
+                    "fs_name": "invalid_fs_name"  # invalid fs
+                }
+            }
+            ret = self._nfs_export_apply(self.cluster_id, export)
+            self.assertEqual(ret[0].returncode, errno.ENOENT)
+            self.assertIn("filesystem invalid_fs_name not found for "
+                          "export block at index 1", ret[0].stderr.getvalue())
+        finally:
+            self._delete_cluster_with_fs(self.fs_name, mnt_pt)
+            self.ctx.cluster.run(args=['rm', '-rf', f'{mnt_pt}'])
+
+    def test_nfs_export_apply_json_output_states(self):
+        """
+        If export creation/update is done using:
+        ceph nfs export apply <nfs_cluster> -i <{conf/json}_file> then the
+        "status" field in the json output maybe added, updated, error or
+        warning. Test different scenarios to make sure these states are
+        in the json output as expected.
+        """
+
+        mnt_pt = self._sys_cmd(['mktemp', '-d']).decode().strip()
+        self._create_cluster_with_fs(self.fs_name, mnt_pt)
+        try:
+            self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir1'])
+            self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir2'])
+            self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir3'])
+            self._create_export(export_id='1',
+                                extra_cmd=['--pseudo-path', self.pseudo_path,
+                                           '--path', '/testdir1'])
+            exports = [
+                {   # change pseudo, state should be "updated"
+                    "export_id": 1,
+                    "path": "/testdir1",
+                    "pseudo": self.pseudo_path+'1',
+                    "squash": "none",
+                    "access_type": "rw",
+                    "protocols": [4],
+                    "fsal": {
+                        "name": "CEPH",
+                        "user_id": "nfs.test.1",
+                        "fs_name": self.fs_name
+                    }
+                },
+                {   # a new export, state should be "added"
+                    "export_id": 2,
+                    "path": "/testdir2",
+                    "pseudo": self.pseudo_path+'2',
+                    "squash": "none",
+                    "access_type": "rw",
+                    "protocols": [4],
+                    "fsal": {
+                        "name": "CEPH",
+                        "user_id": "nfs.test.2",
+                        "fs_name": self.fs_name
+                    }
+                },
+                {   # error in export block, state should be "error" since the
+                    # fs_name is invalid
+                    "export_id": 3,
+                    "path": "/testdir3",
+                    "pseudo": self.pseudo_path+'3',
+                    "squash": "none",
+                    "access_type": "RW",
+                    "protocols": [4],
+                    "fsal": {
+                        "name": "CEPH",
+                        "user_id": "nfs.test.3",
+                        "fs_name": "invalid_fs_name"
+                    }
+                }
+            ]
+            ret = self._nfs_export_apply(self.cluster_id, exports)
+            json_output = json.loads(ret[0].stdout.getvalue().strip())
+            self.assertEqual(len(json_output), 3)
+            self.assertEqual(json_output[0]["state"], "updated")
+            self.assertEqual(json_output[1]["state"], "added")
+            self.assertEqual(json_output[2]["state"], "error")
+        finally:
+            self._delete_cluster_with_fs(self.fs_name, mnt_pt)
+            self.ctx.cluster.run(args=['rm', '-rf', f'{mnt_pt}'])
+
+    def test_pseudo_path_in_json_response_when_updating_exports_failed(self):
+        """
+        Test that on export update/creation failure while using
+        ceph nfs export apply <nfs_cluster> -i <json/conf>, the failed
+        exports pseudo paths are visible in the JSON response to CLI and the
+        return code is set to EIO.
+        """
+        mnt_pt = self._sys_cmd(['mktemp', '-d']).decode().strip()
+        self._create_cluster_with_fs(self.fs_name, mnt_pt)
+        self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir1'])
+        self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir2'])
+        self._create_export(export_id='1',
+                            extra_cmd=['--pseudo-path', self.pseudo_path])
+
+        ret = self.ctx.cluster.run(args=['ceph', 'nfs', 'export', 'apply',
+                                         self.cluster_id, '-i', '-'],
+                                   check_status=False,
+                                   stdin=json.dumps([
+                                    {
+                                        "export_id": 11,  # change not allowed
+                                        "path": "/testdir1",
+                                        "pseudo": self.pseudo_path,
+                                        "squash": "none",
+                                        "access_type": "rw",
+                                        "protocols": [4],
+                                        "fsal": {
+                                            "name": "CEPH",
+                                            "fs_name": self.fs_name
+                                        }
+                                    },
+                                    {
+                                        "path": "/testdir2",
+                                        "pseudo": self.pseudo_path+'1',
+                                        "squash": "none",
+                                        "access_type": "rw",
+                                        "protocols": [4],
+                                        "fsal": {
+                                            "name": "CEPH",
+                                            "fs_name": "foo"  # invalid fs
+                                        }
+                                    }]),
+                                   stdout=StringIO(), stderr=StringIO())
+
+        try:
+            # EIO since multiple exports failure (first export failed to be
+            # modified while the second one failed to be created)
+            self.assertEqual(ret[0].returncode, errno.EIO)
+            err_info = ret[0].stdout
+            if err_info:
+                update_details = json.loads(err_info.getvalue())
+                self.assertEqual(update_details[0]["pseudo"], self.pseudo_path)
+                self.assertEqual(update_details[1]["pseudo"], self.pseudo_path+'1')
+            else:
+                self.fail("Could not retrieve any export update data")
+
+            # verify second export wasn't created
+            exports = json.loads(self._nfs_cmd('export', 'ls',
+                                               self.cluster_id, '--detailed'))
+            self.assertEqual(len(exports), 1)
+
+        finally:
+            self._delete_cluster_with_fs(self.fs_name, mnt_pt)
+            self.ctx.cluster.run(args=['rm', '-rf', f'{mnt_pt}'])
diff --git a/qa/tasks/cephfs/test_pool_perm.py b/qa/tasks/cephfs/test_pool_perm.py
index 9912debed..b55052b82 100644
--- a/qa/tasks/cephfs/test_pool_perm.py
+++ b/qa/tasks/cephfs/test_pool_perm.py
@@ -30,9 +30,9 @@ class TestPoolPerm(CephFSTestCase):
         client_name = "client.{0}".format(self.mount_a.client_id)
 
         # set data pool read only
-        self.fs.mon_manager.raw_cluster_cmd_result(
-            'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd',
-            'allow r pool={0}'.format(self.fs.get_data_pool_name()))
+        self.get_ceph_cmd_result(
+            'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r',
+            'osd', 'allow r pool={0}'.format(self.fs.get_data_pool_name()))
 
         self.mount_a.umount_wait()
         self.mount_a.mount_wait()
@@ -41,9 +41,9 @@ class TestPoolPerm(CephFSTestCase):
         self.mount_a.run_python(remote_script.format(path=file_path, check_read=str(False)))
 
         # set data pool write only
-        self.fs.mon_manager.raw_cluster_cmd_result(
-            'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd',
-            'allow w pool={0}'.format(self.fs.get_data_pool_name()))
+        self.get_ceph_cmd_result(
+            'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r',
+            'osd', 'allow w pool={0}'.format(self.fs.get_data_pool_name()))
 
         self.mount_a.umount_wait()
         self.mount_a.mount_wait()
@@ -66,7 +66,7 @@ class TestPoolPerm(CephFSTestCase):
         self.mount_a.run_shell(["mkdir", "layoutdir"])
 
         # Set MDS 'rw' perms: missing 'p' means no setting pool layouts
-        self.fs.mon_manager.raw_cluster_cmd_result(
+        self.get_ceph_cmd_result(
             'auth', 'caps', client_name, 'mds', 'allow rw', 'mon', 'allow r',
             'osd',
             'allow rw pool={0},allow rw pool={1}'.format(
@@ -86,7 +86,7 @@ class TestPoolPerm(CephFSTestCase):
         self.mount_a.umount_wait()
 
         # Set MDS 'rwp' perms: should now be able to set layouts
-        self.fs.mon_manager.raw_cluster_cmd_result(
+        self.get_ceph_cmd_result(
             'auth', 'caps', client_name, 'mds', 'allow rwp', 'mon', 'allow r',
             'osd',
             'allow rw pool={0},allow rw pool={1}'.format(
@@ -101,7 +101,7 @@ class TestPoolPerm(CephFSTestCase):
         self.mount_a.umount_wait()
 
     def tearDown(self):
-        self.fs.mon_manager.raw_cluster_cmd_result(
+        self.get_ceph_cmd_result(
             'auth', 'caps', "client.{0}".format(self.mount_a.client_id),
             'mds', 'allow', 'mon', 'allow r', 'osd',
             'allow rw pool={0}'.format(self.fs.get_data_pool_names()[0]))
diff --git a/qa/tasks/cephfs/test_quota.py b/qa/tasks/cephfs/test_quota.py
index 0386672bd..b5691c838 100644
--- a/qa/tasks/cephfs/test_quota.py
+++ b/qa/tasks/cephfs/test_quota.py
@@ -104,3 +104,59 @@ class TestQuota(CephFSTestCase):
         with self.assertRaises(CommandFailedError):
             self.mount_b.write_n_mb("subdir_data/file", 40)
 
+    def test_human_readable_quota_values(self):
+        """
+        test human-readable values for setting ceph.quota.max_bytes
+        """
+        self.mount_a.run_shell(["mkdir", "subdir"])
+
+        self.assertEqual(self.mount_a.getfattr("./subdir",
+                                               "ceph.quota.max_bytes"), None)
+
+        readable_values = {"10K": "10240",
+                           "100Ki": "102400",
+                           "10M": "10485760",
+                           "100Mi": "104857600",
+                           "2G": "2147483648",
+                           "4Gi": "4294967296",
+                           "1T": "1099511627776",
+                           "2Ti": "2199023255552"}
+        for readable_value in readable_values:
+            self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes",
+                                  readable_value)
+            self.assertEqual(self.mount_a.getfattr(
+                "./subdir", "ceph.quota.max_bytes"),
+                readable_values.get(readable_value))
+
+    def test_human_readable_quota_invalid_values(self):
+        """
+        test invalid values for ceph.quota.max_bytes
+        """
+
+        self.mount_a.run_shell(["mkdir", "subdir"])
+
+        invalid_values = ["10A", "1y00Ki", "af00", "G", "", " ", "-1t", "-1"]
+        for invalid_value in invalid_values:
+            with self.assertRaises(CommandFailedError):
+                self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes",
+                                      invalid_value)
+
+    def test_disable_enable_human_readable_quota_values(self):
+        """
+        test:
+        1) disabling ceph.quota.max_bytes using byte value.
+        2) enabling it again using human readable value.
+        3) disabling it again but using human readable value.
+        """
+
+        self.mount_a.run_shell(["mkdir", "subdir"])
+
+        self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes", "0")
+        self.assertEqual(self.mount_a.getfattr("./subdir",
+                                               "ceph.quota.max_bytes"), None)
+        self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes", "1K")
+        self.assertEqual(self.mount_a.getfattr("./subdir",
+                                               "ceph.quota.max_bytes"), "1024")
+        self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes", "0M")
+        self.assertEqual(self.mount_a.getfattr("./subdir",
+                                               "ceph.quota.max_bytes"), None)
diff --git a/qa/tasks/cephfs/test_recovery_fs.py b/qa/tasks/cephfs/test_recovery_fs.py
index bbcdf9769..17669c0f2 100644
--- a/qa/tasks/cephfs/test_recovery_fs.py
+++ b/qa/tasks/cephfs/test_recovery_fs.py
@@ -27,7 +27,7 @@ class TestFSRecovery(CephFSTestCase):
         # recovered/intact
         self.fs.rm()
         # Recreate file system with pool and previous fscid
-        self.fs.mon_manager.raw_cluster_cmd(
+        self.run_ceph_cmd(
             'fs', 'new', self.fs.name, metadata_pool, data_pool,
             '--recover', '--force', '--fscid', f'{self.fs.id}')
         self.fs.set_joinable()
diff --git a/qa/tasks/cephfs/test_recovery_pool.py b/qa/tasks/cephfs/test_recovery_pool.py
index 8c4e1967d..7aef28229 100644
--- a/qa/tasks/cephfs/test_recovery_pool.py
+++ b/qa/tasks/cephfs/test_recovery_pool.py
@@ -119,7 +119,7 @@ class TestRecoveryPool(CephFSTestCase):
         recovery_fs.create(recover=True, metadata_overlay=True)
 
         recovery_pool = recovery_fs.get_metadata_pool_name()
-        recovery_fs.mon_manager.raw_cluster_cmd('-s')
+        self.run_ceph_cmd('-s')
 
         # Reset the MDS map in case multiple ranks were in play: recovery procedure
         # only understands how to rebuild metadata under rank 0
diff --git a/qa/tasks/cephfs/test_scrub_checks.py b/qa/tasks/cephfs/test_scrub_checks.py
index e41b997a6..f17a6ceb1 100644
--- a/qa/tasks/cephfs/test_scrub_checks.py
+++ b/qa/tasks/cephfs/test_scrub_checks.py
@@ -281,8 +281,8 @@ class TestScrubChecks(CephFSTestCase):
             all_damage = self.fs.rank_tell(["damage", "ls"], mds_rank)
             damage = [d for d in all_damage if d['ino'] == ino and d['damage_type'] == dtype]
             for d in damage:
-                self.fs.mon_manager.raw_cluster_cmd(
-                    'tell', 'mds.{0}'.format(self.fs.get_active_names()[mds_rank]),
+                self.run_ceph_cmd(
+                    'tell', f'mds.{self.fs.get_active_names()[mds_rank]}',
                     "damage", "rm", str(d['id']))
             return len(damage) > 0
 
diff --git a/qa/tasks/cephfs/test_sessionmap.py b/qa/tasks/cephfs/test_sessionmap.py
index ad6fd1d60..b3b88af72 100644
--- a/qa/tasks/cephfs/test_sessionmap.py
+++ b/qa/tasks/cephfs/test_sessionmap.py
@@ -158,7 +158,7 @@ class TestSessionMap(CephFSTestCase):
         if mon_caps is None:
             mon_caps = "allow r"
 
-        out = self.fs.mon_manager.raw_cluster_cmd(
+        out = self.get_ceph_cmd_stdout(
             "auth", "get-or-create", "client.{name}".format(name=id_name),
             "mds", mds_caps,
             "osd", osd_caps,
diff --git a/qa/tasks/cephfs/test_snap_schedules.py b/qa/tasks/cephfs/test_snap_schedules.py
index 0264cac32..8bbd679ef 100644
--- a/qa/tasks/cephfs/test_snap_schedules.py
+++ b/qa/tasks/cephfs/test_snap_schedules.py
@@ -3,6 +3,7 @@ import json
 import time
 import errno
 import logging
+import uuid
 
 from tasks.cephfs.cephfs_test_case import CephFSTestCase
 from teuthology.exceptions import CommandFailedError
@@ -28,6 +29,29 @@ class TestSnapSchedulesHelper(CephFSTestCase):
     # this should be in sync with snap_schedule format
     SNAPSHOT_TS_FORMAT = '%Y-%m-%d-%H_%M_%S'
 
+    def remove_snapshots(self, dir_path, sdn):
+        snap_path = f'{dir_path}/{sdn}'
+
+        snapshots = self.mount_a.ls(path=snap_path)
+        for snapshot in snapshots:
+            if snapshot.startswith("_scheduled"):
+                continue
+            snapshot_path = os.path.join(snap_path, snapshot)
+            log.debug(f'removing snapshot: {snapshot_path}')
+            self.mount_a.run_shell(['sudo', 'rmdir', snapshot_path])
+
+    def get_snap_dir_name(self):
+        from .fuse_mount import FuseMount
+        from .kernel_mount import KernelMount
+
+        if isinstance(self.mount_a, KernelMount):
+            sdn = self.mount_a.client_config.get('snapdirname', '.snap')
+        elif isinstance(self.mount_a, FuseMount):
+            sdn = self.mount_a.client_config.get('client_snapdir', '.snap')
+            self.fs.set_ceph_conf('client', 'client snapdir', sdn)
+            self.mount_a.remount()
+        return sdn
+
     def check_scheduled_snapshot(self, exec_time, timo):
         now = time.time()
         delta = now - exec_time
@@ -36,7 +60,7 @@ class TestSnapSchedulesHelper(CephFSTestCase):
         self.assertTrue((delta <= timo + 5) and (delta >= timo - 5))
 
     def _fs_cmd(self, *args):
-        return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", *args)
+        return self.get_ceph_cmd_stdout("fs", *args)
 
     def fs_snap_schedule_cmd(self, *args, **kwargs):
         if 'fs' in kwargs:
@@ -61,10 +85,10 @@ class TestSnapSchedulesHelper(CephFSTestCase):
             self.volname = result[0]['name']
 
     def _enable_snap_schedule(self):
-        return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "snap_schedule")
+        return self.get_ceph_cmd_stdout("mgr", "module", "enable", "snap_schedule")
 
     def _disable_snap_schedule(self):
-        return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "snap_schedule")
+        return self.get_ceph_cmd_stdout("mgr", "module", "disable", "snap_schedule")
 
     def _allow_minute_granularity_snapshots(self):
         self.config_set('mgr', 'mgr/snap_schedule/allow_m_granularity', True)
@@ -94,7 +118,7 @@ class TestSnapSchedulesHelper(CephFSTestCase):
     def _schedule_to_timeout(self, schedule):
         mult = schedule[-1]
         period = int(schedule[0:-1])
-        if mult == 'M':
+        if mult == 'm':
             return period * 60
         elif mult == 'h':
             return period * 60 * 60
@@ -102,6 +126,10 @@ class TestSnapSchedulesHelper(CephFSTestCase):
             return period * 60 * 60 * 24
         elif mult == 'w':
             return period * 60 * 60 * 24 * 7
+        elif mult == 'M':
+            return period * 60 * 60 * 24 * 30
+        elif mult == 'Y':
+            return period * 60 * 60 * 24 * 365
         else:
             raise RuntimeError('schedule multiplier not recognized')
 
@@ -166,7 +194,7 @@ class TestSnapSchedulesHelper(CephFSTestCase):
             self.assertTrue(schedule in json_res['schedule'])
         for retention in retentions:
             self.assertTrue(retention in json_res['retention'])
-    
+
 class TestSnapSchedules(TestSnapSchedulesHelper):
     def remove_snapshots(self, dir_path):
         snap_path = f'{dir_path}/.snap'
@@ -224,15 +252,15 @@ class TestSnapSchedules(TestSnapSchedulesHelper):
         self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
 
         # set a schedule on the dir
-        self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M')
+        self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1m')
         exec_time = time.time()
 
-        timo, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+        timo, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1m')
         log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo}s...')
         to_wait = timo + 2 # some leeway to avoid false failures...
 
         # verify snapshot schedule
-        self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M'])
+        self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1m'])
 
         def verify_added(snaps_added):
             log.debug(f'snapshots added={snaps_added}')
@@ -260,18 +288,18 @@ class TestSnapSchedules(TestSnapSchedulesHelper):
         self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
 
         # set schedules on the dir
-        self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M')
-        self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='2M')
+        self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1m')
+        self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='2m')
         exec_time = time.time()
 
-        timo_1, snap_sfx_1 = self.calc_wait_time_and_snap_name(exec_time, '1M')
+        timo_1, snap_sfx_1 = self.calc_wait_time_and_snap_name(exec_time, '1m')
         log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_1} in ~{timo_1}s...')
-        timo_2, snap_sfx_2 = self.calc_wait_time_and_snap_name(exec_time, '2M')
+        timo_2, snap_sfx_2 = self.calc_wait_time_and_snap_name(exec_time, '2m')
         log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_2} in ~{timo_2}s...')
         to_wait = timo_2 + 2 # use max timeout
 
         # verify snapshot schedule
-        self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M', '2M'])
+        self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1m', '2m'])
 
         def verify_added_1(snaps_added):
             log.debug(f'snapshots added={snaps_added}')
@@ -309,16 +337,16 @@ class TestSnapSchedules(TestSnapSchedulesHelper):
         self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
 
         # set a schedule on the dir
-        self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M')
-        self.fs_snap_schedule_cmd('retention', 'add', path=TestSnapSchedules.TEST_DIRECTORY, retention_spec_or_period='1M')
+        self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1m')
+        self.fs_snap_schedule_cmd('retention', 'add', path=TestSnapSchedules.TEST_DIRECTORY, retention_spec_or_period='1m')
         exec_time = time.time()
 
-        timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+        timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1m')
         log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_1}s...')
         to_wait = timo_1 + 2 # some leeway to avoid false failures...
 
         # verify snapshot schedule
-        self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M'], retentions=[{'M':1}])
+        self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1m'], retentions=[{'m':1}])
 
         def verify_added(snaps_added):
             log.debug(f'snapshots added={snaps_added}')
@@ -400,26 +428,26 @@ class TestSnapSchedules(TestSnapSchedulesHelper):
 
         for d in testdirs:
             self.mount_a.run_shell(['mkdir', '-p', d[1:]])
-            self.fs_snap_schedule_cmd('add', path=d, snap_schedule='1M')
+            self.fs_snap_schedule_cmd('add', path=d, snap_schedule='1m')
 
         exec_time = time.time()
-        timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+        timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1m')
 
         for d in testdirs:
-            self.fs_snap_schedule_cmd('activate', path=d, snap_schedule='1M')
+            self.fs_snap_schedule_cmd('activate', path=d, snap_schedule='1m')
 
         # we wait for 10 snaps to be taken
         wait_time = timo_1 + 10 * 60 + 15
         time.sleep(wait_time)
 
         for d in testdirs:
-            self.fs_snap_schedule_cmd('deactivate', path=d, snap_schedule='1M')
+            self.fs_snap_schedule_cmd('deactivate', path=d, snap_schedule='1m')
 
         for d in testdirs:
             self.verify_snap_stats(d)
 
         for d in testdirs:
-            self.fs_snap_schedule_cmd('remove', path=d, snap_schedule='1M')
+            self.fs_snap_schedule_cmd('remove', path=d, snap_schedule='1m')
             self.remove_snapshots(d[1:])
             self.mount_a.run_shell(['rmdir', d[1:]])
 
@@ -428,12 +456,12 @@ class TestSnapSchedules(TestSnapSchedulesHelper):
         self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY])
         testdir = os.path.join("/", TestSnapSchedules.TEST_DIRECTORY, "test_restart")
         self.mount_a.run_shell(['mkdir', '-p', testdir[1:]])
-        self.fs_snap_schedule_cmd('add', path=testdir, snap_schedule='1M')
+        self.fs_snap_schedule_cmd('add', path=testdir, snap_schedule='1m')
 
         exec_time = time.time()
-        timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+        timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1m')
 
-        self.fs_snap_schedule_cmd('activate', path=testdir, snap_schedule='1M')
+        self.fs_snap_schedule_cmd('activate', path=testdir, snap_schedule='1m')
 
         # we wait for 10 snaps to be taken
         wait_time = timo_1 + 10 * 60 + 15
@@ -448,7 +476,7 @@ class TestSnapSchedules(TestSnapSchedulesHelper):
         log.debug(f'restarting active mgr: {active_mgr}')
         self.mgr_cluster.mon_manager.revive_mgr(active_mgr)
         time.sleep(300)  # sleep for 5 minutes
-        self.fs_snap_schedule_cmd('deactivate', path=testdir, snap_schedule='1M')
+        self.fs_snap_schedule_cmd('deactivate', path=testdir, snap_schedule='1m')
 
         new_stats = self.get_snap_stats(testdir)
         self.assertTrue(new_stats['fs_count'] == new_stats['db_count'])
@@ -456,15 +484,15 @@ class TestSnapSchedules(TestSnapSchedulesHelper):
         self.assertTrue(new_stats['db_count'] > old_stats['db_count'])
 
         # cleanup
-        self.fs_snap_schedule_cmd('remove', path=testdir, snap_schedule='1M')
+        self.fs_snap_schedule_cmd('remove', path=testdir, snap_schedule='1m')
         self.remove_snapshots(testdir[1:])
-        self.mount_a.run_shell(['rmdir', testdir[1:]])    
+        self.mount_a.run_shell(['rmdir', testdir[1:]])
 
     def test_schedule_auto_deactivation_for_non_existent_path(self):
         """
         Test that a non-existent path leads to schedule deactivation after a few retries.
         """
-        self.fs_snap_schedule_cmd('add', path="/bad-path", snap_schedule='1M')
+        self.fs_snap_schedule_cmd('add', path="/bad-path", snap_schedule='1m')
         start_time = time.time()
 
         while time.time() - start_time < 60.0:
@@ -491,15 +519,15 @@ class TestSnapSchedules(TestSnapSchedulesHelper):
         self.mount_a.run_shell(['mkdir', '-p', test_dir[1:]])
 
         # set a schedule on the dir
-        self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1M')
+        self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1m')
         self.fs_snap_schedule_cmd('retention', 'add', path=test_dir,
                                   retention_spec_or_period=f'{total_snaps}n')
         exec_time = time.time()
 
-        timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+        timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1m')
 
         # verify snapshot schedule
-        self.verify_schedule(test_dir, ['1M'])
+        self.verify_schedule(test_dir, ['1m'])
 
         # we wait for total_snaps snaps to be taken
         wait_time = timo_1 + total_snaps * 60 + 15
@@ -517,45 +545,513 @@ class TestSnapSchedules(TestSnapSchedulesHelper):
 
         self.mount_a.run_shell(['rmdir', test_dir[1:]])
 
+    def test_snap_schedule_all_periods(self):
+        test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/minutes"
+        self.mount_a.run_shell(['mkdir', '-p', test_dir])
+        self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1m')
 
-class TestSnapSchedulesSnapdir(TestSnapSchedulesHelper):
-    def remove_snapshots(self, dir_path, sdn):
-        snap_path = f'{dir_path}/{sdn}'
+        test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/hourly"
+        self.mount_a.run_shell(['mkdir', '-p', test_dir])
+        self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1h')
 
-        snapshots = self.mount_a.ls(path=snap_path)
-        for snapshot in snapshots:
-            snapshot_path = os.path.join(snap_path, snapshot)
-            log.debug(f'removing snapshot: {snapshot_path}')
-            self.mount_a.run_shell(['rmdir', snapshot_path])
+        test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/daily"
+        self.mount_a.run_shell(['mkdir', '-p', test_dir])
+        self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1d')
 
-    def get_snap_dir_name(self):
-        from tasks.cephfs.fuse_mount import FuseMount
-        from tasks.cephfs.kernel_mount import KernelMount
+        test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/weekly"
+        self.mount_a.run_shell(['mkdir', '-p', test_dir])
+        self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1w')
 
-        if isinstance(self.mount_a, KernelMount):
-            sdn = self.mount_a.client_config.get('snapdirname', '.snap')
-        elif isinstance(self.mount_a, FuseMount):
-            sdn = self.mount_a.client_config.get('client_snapdir', '.snap')
-            self.fs.set_ceph_conf('client', 'client snapdir', sdn)
-            self.mount_a.remount()
-        return sdn
+        test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/monthly"
+        self.mount_a.run_shell(['mkdir', '-p', test_dir])
+        self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1M')
+
+        test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/yearly"
+        self.mount_a.run_shell(['mkdir', '-p', test_dir])
+        self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1Y')
+
+        test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/bad_period_spec"
+        self.mount_a.run_shell(['mkdir', '-p', test_dir])
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1X')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1MM')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='M')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='-1m')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='')
+
+        test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/minutes"
+        self.mount_a.run_shell(['rmdir', test_dir])
+        test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/hourly"
+        self.mount_a.run_shell(['rmdir', test_dir])
+        test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/daily"
+        self.mount_a.run_shell(['rmdir', test_dir])
+        test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/weekly"
+        self.mount_a.run_shell(['rmdir', test_dir])
+        test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/monthly"
+        self.mount_a.run_shell(['rmdir', test_dir])
+        test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/yearly"
+        self.mount_a.run_shell(['rmdir', test_dir])
+        test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/bad_period_spec"
+        self.mount_a.run_shell(['rmdir', test_dir])
+
+
+class TestSnapSchedulesSubvolAndGroupArguments(TestSnapSchedulesHelper):
+    def setUp(self):
+        super(TestSnapSchedulesSubvolAndGroupArguments, self).setUp()
+        self.CREATE_VERSION = int(self.mount_a.ctx['config']['overrides']['subvolume_version'])
+
+    def _create_v1_subvolume(self, subvol_name, subvol_group=None, has_snapshot=False, subvol_type='subvolume', state='complete'):
+        group = subvol_group if subvol_group is not None else '_nogroup'
+        basepath = os.path.join("volumes", group, subvol_name)
+        uuid_str = str(uuid.uuid4())
+        createpath = os.path.join(basepath, uuid_str)
+        self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False)
+        self.mount_a.setfattr(createpath, 'ceph.dir.subvolume', '1', sudo=True)
+
+        # create a v1 snapshot, to prevent auto upgrades
+        if has_snapshot:
+            snappath = os.path.join(createpath, self.get_snap_dir_name(), "fake")
+            self.mount_a.run_shell(['sudo', 'mkdir', '-p', snappath], omit_sudo=False)
+
+        # add required xattrs to subvolume
+        default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool")
+        self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True)
+
+        # create a v1 .meta file
+        cp = "/" + createpath
+        meta_contents = f"[GLOBAL]\nversion = 1\ntype = {subvol_type}\npath = {cp}\nstate = {state}\n"
+        meta_contents += "allow_subvolume_upgrade = 0\n"  # boolean
+        if state == 'pending':
+            # add a fake clone source
+            meta_contents = meta_contents + '[source]\nvolume = fake\nsubvolume = fake\nsnapshot = fake\n'
+        meta_filepath1 = os.path.join(self.mount_a.mountpoint, basepath, ".meta")
+        self.mount_a.client_remote.write_file(meta_filepath1, meta_contents, sudo=True)
+        return createpath
+
+    def _create_subvolume(self, version, subvol_name, subvol_group=None):
+        if version == 1:
+            self._create_v1_subvolume(subvol_name, subvol_group)
+        elif version >= 2:
+            if subvol_group:
+                self._fs_cmd('subvolume', 'create', 'cephfs', subvol_name, '--group_name', subvol_group)
+            else:
+                self._fs_cmd('subvolume', 'create', 'cephfs', subvol_name)
+        else:
+            self.assertTrue('NoSuchSubvolumeVersion' == None)
+
+    def _get_subvol_snapdir_path(self, version, subvol, group):
+        args = ['subvolume', 'getpath', 'cephfs', subvol]
+        if group:
+            args += ['--group_name', group]
+
+        path = self.get_ceph_cmd_stdout("fs", *args).rstrip()
+        if version >= 2:
+            path += "/.."
+        return path[1:]
+
+    def _verify_snap_schedule(self, version, subvol, group):
+        time.sleep(75)
+        path = self._get_subvol_snapdir_path(version, subvol, group)
+        path += "/" + self.get_snap_dir_name()
+        snaps = self.mount_a.ls(path=path)
+        log.debug(f"snaps:{snaps}")
+        count = 0
+        for snapname in snaps:
+            if snapname.startswith("scheduled-"):
+                count += 1
+        # confirm presence of snapshot dir under .snap dir
+        self.assertGreater(count, 0)
+
+    def test_snap_schedule_subvol_and_group_arguments_01(self):
+        """
+        Test subvol schedule creation succeeds for default subvolgroup.
+        """
+        self._create_subvolume(self.CREATE_VERSION, 'sv01')
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv01', path='.', snap_schedule='1m')
+
+        self._verify_snap_schedule(self.CREATE_VERSION, 'sv01', None)
+        path = self._get_subvol_snapdir_path(self.CREATE_VERSION, 'sv01', None)
+        self.remove_snapshots(path, self.get_snap_dir_name())
+
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv01', path='.', snap_schedule='1m')
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv01')
 
+    def test_snap_schedule_subvol_and_group_arguments_02(self):
+        """
+        Test subvol schedule creation fails for non-default subvolgroup.
+        """
+        self._create_subvolume(self.CREATE_VERSION, 'sv02')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('add', '--subvol', 'sv02', '--group', 'mygrp02', path='.', snap_schedule='1m')
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv02')
+
+    def test_snap_schedule_subvol_and_group_arguments_03(self):
+        """
+        Test subvol schedule creation fails when subvol exists only under default group.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp03')
+        self._create_subvolume(self.CREATE_VERSION, 'sv03', 'mygrp03')
+
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('add', '--subvol', 'sv03', path='.', snap_schedule='1m')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv03', '--group_name', 'mygrp03')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp03')
+
+    def test_snap_schedule_subvol_and_group_arguments_04(self):
+        """
+        Test subvol schedule creation fails without subvol argument.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp04')
+        self._create_subvolume(self.CREATE_VERSION, 'sv04', 'mygrp04')
+
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('add', '--group', 'mygrp04', path='.', snap_schedule='1m')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv04', '--group_name', 'mygrp04')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp04')
+
+    def test_snap_schedule_subvol_and_group_arguments_05(self):
+        """
+        Test subvol schedule creation succeeds for a subvol under a subvolgroup.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp05')
+        self._create_subvolume(self.CREATE_VERSION, 'sv05', 'mygrp05')
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv05', '--group', 'mygrp05', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._verify_snap_schedule(self.CREATE_VERSION, 'sv05', 'mygrp05')
+        path = self._get_subvol_snapdir_path(self.CREATE_VERSION, 'sv05', 'mygrp05')
+        self.remove_snapshots(path, self.get_snap_dir_name())
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv05', '--group_name', 'mygrp05')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp05')
+
+    def test_snap_schedule_subvol_and_group_arguments_06(self):
+        """
+        Test subvol schedule listing fails without a subvolgroup argument.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp06')
+        self._create_subvolume(self.CREATE_VERSION, 'sv06', 'mygrp06')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv06', '--group', 'mygrp06', path='.', snap_schedule='1m', fs='cephfs')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('list', '--subvol', 'sv06', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv06', '--group', 'mygrp06', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv06', '--group_name', 'mygrp06')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp06')
+
+    def test_snap_schedule_subvol_and_group_arguments_07(self):
+        """
+        Test subvol schedule listing fails without a subvol argument.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp07')
+        self._create_subvolume(self.CREATE_VERSION, 'sv07', 'mygrp07')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv07', '--group', 'mygrp07', path='.', snap_schedule='1m', fs='cephfs')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('list', '--group', 'mygrp07', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv07', '--group', 'mygrp07', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv07', '--group_name', 'mygrp07')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp07')
+
+    def test_snap_schedule_subvol_and_group_arguments_08(self):
+        """
+        Test subvol schedule listing succeeds with a subvol and a subvolgroup argument.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp08')
+        self._create_subvolume(self.CREATE_VERSION, 'sv08', 'mygrp08')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv08', '--group', 'mygrp08', path='.', snap_schedule='1m', fs='cephfs')
+        self.fs_snap_schedule_cmd('list', '--subvol', 'sv08', '--group', 'mygrp08', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv08', '--group', 'mygrp08', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv08', '--group_name', 'mygrp08')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp08')
+
+    def test_snap_schedule_subvol_and_group_arguments_09(self):
+        """
+        Test subvol schedule retention add fails for a subvol without a subvolgroup.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp09')
+        self._create_subvolume(self.CREATE_VERSION, 'sv09', 'mygrp09')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv09', '--group', 'mygrp09', path='.', snap_schedule='1m', fs='cephfs')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv09', path='.', retention_spec_or_period='h', retention_count='5')
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv09', '--group', 'mygrp09', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv09', '--group_name', 'mygrp09')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp09')
+
+    def test_snap_schedule_subvol_and_group_arguments_10(self):
+        """
+        Test subvol schedule retention add fails for a subvol without a subvol argument.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp10')
+        self._create_subvolume(self.CREATE_VERSION, 'sv10', 'mygrp10')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv10', '--group', 'mygrp10', path='.', snap_schedule='1m', fs='cephfs')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('retention', 'add', '--group', 'mygrp10', path='.', retention_spec_or_period='h', retention_count='5')
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv10', '--group', 'mygrp10', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv10', '--group_name', 'mygrp10')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp10')
+
+    def test_snap_schedule_subvol_and_group_arguments_11(self):
+        """
+        Test subvol schedule retention add succeeds for a subvol within a subvolgroup.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp11')
+        self._create_subvolume(self.CREATE_VERSION, 'sv11', 'mygrp11')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv11', '--group', 'mygrp11', path='.', snap_schedule='1m', fs='cephfs')
+        self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv11', '--group', 'mygrp11', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs')
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv11', '--group', 'mygrp11', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv11', '--group_name', 'mygrp11')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp11')
+
+    def test_snap_schedule_subvol_and_group_arguments_12(self):
+        """
+        Test subvol schedule activation fails for a subvol without a subvolgroup argument.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp12')
+        self._create_subvolume(self.CREATE_VERSION, 'sv12', 'mygrp12')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv12', '--group', 'mygrp12', path='.', snap_schedule='1m', fs='cephfs')
+        self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv12', '--group', 'mygrp12', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('activate', '--subvol', 'sv12', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv12', '--group', 'mygrp12', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv12', '--group_name', 'mygrp12')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp12')
+
+    def test_snap_schedule_subvol_and_group_arguments_13(self):
+        """
+        Test subvol schedule activation fails for a subvol without a subvol argument.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp13')
+        self._create_subvolume(self.CREATE_VERSION, 'sv13', 'mygrp13')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv13', '--group', 'mygrp13', path='.', snap_schedule='1m', fs='cephfs')
+        self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv13', '--group', 'mygrp13', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('activate', '--group', 'mygrp13', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv13', '--group', 'mygrp13', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv13', '--group_name', 'mygrp13')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp13')
+
+    def test_snap_schedule_subvol_and_group_arguments_14(self):
+        """
+        Test subvol schedule activation succeeds for a subvol within a subvolgroup.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp14')
+        self._create_subvolume(self.CREATE_VERSION, 'sv14', 'mygrp14')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv14', '--group', 'mygrp14', path='.', snap_schedule='1m', fs='cephfs')
+        self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv14', '--group', 'mygrp14', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs')
+        self.fs_snap_schedule_cmd('activate', '--subvol', 'sv14', '--group', 'mygrp14', path='.', fs='cephfs')
+
+        self._verify_snap_schedule(self.CREATE_VERSION, 'sv14', 'mygrp14')
+        path = self._get_subvol_snapdir_path(self.CREATE_VERSION, 'sv14', 'mygrp14')
+        self.remove_snapshots(path, self.get_snap_dir_name())
+
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv14', '--group', 'mygrp14', path='.', snap_schedule='1m', fs='cephfs')
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv14', '--group_name', 'mygrp14')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp14')
+
+    def test_snap_schedule_subvol_and_group_arguments_15(self):
+        """
+        Test subvol schedule deactivation fails for a subvol without a subvolgroup argument.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp15')
+        self._create_subvolume(self.CREATE_VERSION, 'sv15', 'mygrp15')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv15', '--group', 'mygrp15', path='.', snap_schedule='1m', fs='cephfs')
+        self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv15', '--group', 'mygrp15', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs')
+        self.fs_snap_schedule_cmd('activate', '--subvol', 'sv15', '--group', 'mygrp15', path='.', fs='cephfs')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('deactivate', '--subvol', 'sv15', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv15', '--group', 'mygrp15', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv15', '--group_name', 'mygrp15')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp15')
+
+    def test_snap_schedule_subvol_and_group_arguments_16(self):
+        """
+        Test subvol schedule deactivation fails for a subvol without a subvol argument.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp16')
+        self._create_subvolume(self.CREATE_VERSION, 'sv16', 'mygrp16')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv16', '--group', 'mygrp16', path='.', snap_schedule='1m', fs='cephfs')
+        self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv16', '--group', 'mygrp16', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs')
+        self.fs_snap_schedule_cmd('activate', '--subvol', 'sv16', '--group', 'mygrp16', path='.', fs='cephfs')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('deactivate', '--group', 'mygrp16', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv16', '--group', 'mygrp16', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv16', '--group_name', 'mygrp16')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp16')
+
+    def test_snap_schedule_subvol_and_group_arguments_17(self):
+        """
+        Test subvol schedule deactivation succeeds for a subvol within a subvolgroup.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp17')
+        self._create_subvolume(self.CREATE_VERSION, 'sv17', 'mygrp17')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv17', '--group', 'mygrp17', path='.', snap_schedule='1m', fs='cephfs')
+        self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv17', '--group', 'mygrp17', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs')
+        self.fs_snap_schedule_cmd('activate', '--subvol', 'sv17', '--group', 'mygrp17', path='.', fs='cephfs')
+
+        self._verify_snap_schedule(self.CREATE_VERSION, 'sv17', 'mygrp17')
+        path = self._get_subvol_snapdir_path(self.CREATE_VERSION, 'sv17', 'mygrp17')
+        self.remove_snapshots(path, self.get_snap_dir_name())
+
+        self.fs_snap_schedule_cmd('deactivate', '--subvol', 'sv17', '--group', 'mygrp17', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv17', '--group', 'mygrp17', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv17', '--group_name', 'mygrp17')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp17')
+
+    def test_snap_schedule_subvol_and_group_arguments_18(self):
+        """
+        Test subvol schedule retention remove fails for a subvol without a subvolgroup argument.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp18')
+        self._create_subvolume(self.CREATE_VERSION, 'sv18', 'mygrp18')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv18', '--group', 'mygrp18', path='.', snap_schedule='1m', fs='cephfs')
+        self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv18', '--group', 'mygrp18', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs')
+        self.fs_snap_schedule_cmd('activate', '--subvol', 'sv18', '--group', 'mygrp18', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('deactivate', '--subvol', 'sv18', '--group', 'mygrp18', path='.', fs='cephfs')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('retention', 'remove', '--subvol', 'sv18', path='.', retention_spec_or_period='h', retention_count='5', fs='cephfs')
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv18', '--group', 'mygrp18', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv18', '--group_name', 'mygrp18')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp18')
+
+    def test_snap_schedule_subvol_and_group_arguments_19(self):
+        """
+        Test subvol schedule retention remove fails for a subvol without a subvol argument.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp19')
+        self._create_subvolume(self.CREATE_VERSION, 'sv19', 'mygrp19')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv19', '--group', 'mygrp19', path='.', snap_schedule='1m', fs='cephfs')
+        self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv19', '--group', 'mygrp19', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs')
+        self.fs_snap_schedule_cmd('activate', '--subvol', 'sv19', '--group', 'mygrp19', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('deactivate', '--subvol', 'sv19', '--group', 'mygrp19', path='.', fs='cephfs')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('retention', 'remove', '--group', 'mygrp19', path='.', retention_spec_or_period='h', retention_count='5', fs='cephfs')
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv19', '--group', 'mygrp19', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv19', '--group_name', 'mygrp19')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp19')
+
+    def test_snap_schedule_subvol_and_group_arguments_20(self):
+        """
+        Test subvol schedule retention remove succeeds for a subvol within a subvolgroup.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp20')
+        self._create_subvolume(self.CREATE_VERSION, 'sv20', 'mygrp20')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv20', '--group', 'mygrp20', path='.', snap_schedule='1m', fs='cephfs')
+        self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv20', '--group', 'mygrp20', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs')
+        self.fs_snap_schedule_cmd('activate', '--subvol', 'sv20', '--group', 'mygrp20', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('deactivate', '--subvol', 'sv20', '--group', 'mygrp20', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('retention', 'remove', '--subvol', 'sv20', '--group', 'mygrp20', path='.', retention_spec_or_period='h', retention_count='5', fs='cephfs')
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv20', '--group', 'mygrp20', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv20', '--group_name', 'mygrp20')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp20')
+
+    def test_snap_schedule_subvol_and_group_arguments_21(self):
+        """
+        Test subvol schedule remove fails for a subvol without a subvolgroup argument.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp21')
+        self._create_subvolume(self.CREATE_VERSION, 'sv21', 'mygrp21')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv21', '--group', 'mygrp21', path='.', snap_schedule='1m', fs='cephfs')
+        self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv21', '--group', 'mygrp21', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs')
+        self.fs_snap_schedule_cmd('activate', '--subvol', 'sv21', '--group', 'mygrp21', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('deactivate', '--subvol', 'sv21', '--group', 'mygrp21', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('retention', 'remove', '--subvol', 'sv21', '--group', 'mygrp21', path='.', retention_spec_or_period='h', retention_count='5', fs='cephfs')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('remove', '--subvol', 'sv21', path='.', snap_schedule='1m', fs='cephfs')
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv21', '--group', 'mygrp21', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv21', '--group_name', 'mygrp21')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp21')
+
+    def test_snap_schedule_subvol_and_group_arguments_22(self):
+        """
+        Test subvol schedule remove fails for a subvol without a subvol argument.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp22')
+        self._create_subvolume(self.CREATE_VERSION, 'sv22', 'mygrp22')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv22', '--group', 'mygrp22', path='.', snap_schedule='1m', fs='cephfs')
+        self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv22', '--group', 'mygrp22', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs')
+        self.fs_snap_schedule_cmd('activate', '--subvol', 'sv22', '--group', 'mygrp22', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('deactivate', '--subvol', 'sv22', '--group', 'mygrp22', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('retention', 'remove', '--subvol', 'sv22', '--group', 'mygrp22', path='.', retention_spec_or_period='h', retention_count='5', fs='cephfs')
+        with self.assertRaises(CommandFailedError):
+            self.fs_snap_schedule_cmd('remove', '--group', 'mygrp22', path='.', snap_schedule='1m', fs='cephfs')
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv22', '--group', 'mygrp22', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv22', '--group_name', 'mygrp22')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp22')
+
+    def test_snap_schedule_subvol_and_group_arguments_23(self):
+        """
+        Test subvol schedule remove succeeds for a subvol within a subvolgroup.
+        """
+        self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp23')
+        self._create_subvolume(self.CREATE_VERSION, 'sv23', 'mygrp23')
+
+        self.fs_snap_schedule_cmd('add', '--subvol', 'sv23', '--group', 'mygrp23', path='.', snap_schedule='1m', fs='cephfs')
+        self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv23', '--group', 'mygrp23', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs')
+        self.fs_snap_schedule_cmd('activate', '--subvol', 'sv23', '--group', 'mygrp23', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('deactivate', '--subvol', 'sv23', '--group', 'mygrp23', path='.', fs='cephfs')
+        self.fs_snap_schedule_cmd('retention', 'remove', '--subvol', 'sv23', '--group', 'mygrp23', path='.', retention_spec_or_period='h', retention_count='5', fs='cephfs')
+        self.fs_snap_schedule_cmd('remove', '--subvol', 'sv23', '--group', 'mygrp23', path='.', snap_schedule='1m', fs='cephfs')
+
+        self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv23', '--group_name', 'mygrp23')
+        self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp23')
+
+
+class TestSnapSchedulesSnapdir(TestSnapSchedulesHelper):
     def test_snap_dir_name(self):
         """Test the correctness of snap directory name"""
         self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedulesSnapdir.TEST_DIRECTORY])
 
         # set a schedule on the dir
-        self.fs_snap_schedule_cmd('add', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY, snap_schedule='1M')
-        self.fs_snap_schedule_cmd('retention', 'add', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY, retention_spec_or_period='1M')
+        self.fs_snap_schedule_cmd('add', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY, snap_schedule='1m')
+        self.fs_snap_schedule_cmd('retention', 'add', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY, retention_spec_or_period='1m')
         exec_time = time.time()
 
-        timo, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M')
+        timo, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1m')
         sdn = self.get_snap_dir_name()
         log.info(f'expecting snap {TestSnapSchedulesSnapdir.TEST_DIRECTORY}/{sdn}/scheduled-{snap_sfx} in ~{timo}s...')
-        
+
         # verify snapshot schedule
-        self.verify_schedule(TestSnapSchedulesSnapdir.TEST_DIRECTORY, ['1M'], retentions=[{'M':1}])
-        
+        self.verify_schedule(TestSnapSchedulesSnapdir.TEST_DIRECTORY, ['1m'], retentions=[{'m':1}])
+
         # remove snapshot schedule
         self.fs_snap_schedule_cmd('remove', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY)
 
diff --git a/qa/tasks/cephfs/test_snapshots.py b/qa/tasks/cephfs/test_snapshots.py
index 608dcc81f..a9639a7eb 100644
--- a/qa/tasks/cephfs/test_snapshots.py
+++ b/qa/tasks/cephfs/test_snapshots.py
@@ -553,12 +553,12 @@ class TestMonSnapsAndFsPools(CephFSTestCase):
         with self.assertRaises(CommandFailedError):
             test_pool_name = self.fs.get_data_pool_name()
             base_cmd = f'osd pool mksnap {test_pool_name} snap3'
-            self.run_cluster_cmd(base_cmd)
+            self.run_ceph_cmd(base_cmd)
 
         with self.assertRaises(CommandFailedError):
             test_pool_name = self.fs.get_metadata_pool_name()
             base_cmd = f'osd pool mksnap {test_pool_name} snap4'
-            self.run_cluster_cmd(base_cmd)
+            self.run_ceph_cmd(base_cmd)
 
     def test_attaching_pools_with_snaps_to_fs_fails(self):
         """
@@ -566,40 +566,40 @@ class TestMonSnapsAndFsPools(CephFSTestCase):
         """
         test_pool_name = 'snap-test-pool'
         base_cmd = f'osd pool create {test_pool_name}'
-        ret = self.run_cluster_cmd_result(base_cmd)
+        ret = self.get_ceph_cmd_result(args=base_cmd, check_status=False)
         self.assertEqual(ret, 0)
 
         self.fs.rados(["mksnap", "snap3"], pool=test_pool_name)
 
         base_cmd = f'fs add_data_pool {self.fs.name} {test_pool_name}'
-        ret = self.run_cluster_cmd_result(base_cmd)
+        ret = self.get_ceph_cmd_result(args=base_cmd, check_status=False)
         self.assertEqual(ret, errno.EOPNOTSUPP)
 
         # cleanup
         self.fs.rados(["rmsnap", "snap3"], pool=test_pool_name)
         base_cmd = f'osd pool delete {test_pool_name}'
-        ret = self.run_cluster_cmd_result(base_cmd)
+        ret = self.get_ceph_cmd_result(args=base_cmd, check_status=False)
 
     def test_using_pool_with_snap_fails_fs_creation(self):
         """
         Test that using a pool with snaps for fs creation fails
         """
         base_cmd = 'osd pool create test_data_pool'
-        ret = self.run_cluster_cmd_result(base_cmd)
+        ret = self.get_ceph_cmd_result(args=base_cmd, check_status=False)
         self.assertEqual(ret, 0)
         base_cmd = 'osd pool create test_metadata_pool'
-        ret = self.run_cluster_cmd_result(base_cmd)
+        ret = self.get_ceph_cmd_result(args=base_cmd, check_status=False)
         self.assertEqual(ret, 0)
 
         self.fs.rados(["mksnap", "snap4"], pool='test_data_pool')
 
         base_cmd = 'fs new testfs test_metadata_pool test_data_pool'
-        ret = self.run_cluster_cmd_result(base_cmd)
+        ret = self.get_ceph_cmd_result(args=base_cmd, check_status=False)
         self.assertEqual(ret, errno.EOPNOTSUPP)
 
         # cleanup
         self.fs.rados(["rmsnap", "snap4"], pool='test_data_pool')
         base_cmd = 'osd pool delete test_data_pool'
-        ret = self.run_cluster_cmd_result(base_cmd)
+        ret = self.get_ceph_cmd_result(args=base_cmd, check_status=False)
         base_cmd = 'osd pool delete test_metadata_pool'
-        ret = self.run_cluster_cmd_result(base_cmd)
+        ret = self.get_ceph_cmd_result(args=base_cmd, check_status=False)
diff --git a/qa/tasks/cephfs/test_strays.py b/qa/tasks/cephfs/test_strays.py
index 8bdc126e2..11701dc28 100644
--- a/qa/tasks/cephfs/test_strays.py
+++ b/qa/tasks/cephfs/test_strays.py
@@ -651,9 +651,8 @@ class TestStrays(CephFSTestCase):
         self.assertFalse(self._is_stopped(1))
 
         # Permit the daemon to start purging again
-        self.fs.mon_manager.raw_cluster_cmd('tell', 'mds.{0}'.format(rank_1_id),
-                                            'injectargs',
-                                            "--mds_max_purge_files 100")
+        self.run_ceph_cmd('tell', 'mds.{0}'.format(rank_1_id),
+                          'injectargs', "--mds_max_purge_files 100")
 
         # It should now proceed through shutdown
         self.fs.wait_for_daemons(timeout=120)
@@ -816,7 +815,7 @@ touch pin/placeholder
 
         :param pool_name: Which pool (must exist)
         """
-        out = self.fs.mon_manager.raw_cluster_cmd("df", "--format=json-pretty")
+        out = self.get_ceph_cmd_stdout("df", "--format=json-pretty")
         for p in json.loads(out)['pools']:
             if p['name'] == pool_name:
                 return p['stats']
diff --git a/qa/tasks/cephfs/test_volumes.py b/qa/tasks/cephfs/test_volumes.py
index 2ecfeb327..612a4ef41 100644
--- a/qa/tasks/cephfs/test_volumes.py
+++ b/qa/tasks/cephfs/test_volumes.py
@@ -19,11 +19,6 @@ log = logging.getLogger(__name__)
 
 class TestVolumesHelper(CephFSTestCase):
     """Helper class for testing FS volume, subvolume group and subvolume operations."""
-    TEST_VOLUME_PREFIX = "volume"
-    TEST_SUBVOLUME_PREFIX="subvolume"
-    TEST_GROUP_PREFIX="group"
-    TEST_SNAPSHOT_PREFIX="snapshot"
-    TEST_CLONE_PREFIX="clone"
     TEST_FILE_NAME_PREFIX="subvolume_file"
 
     # for filling subvolume with data
@@ -35,10 +30,10 @@ class TestVolumesHelper(CephFSTestCase):
     DEFAULT_NUMBER_OF_FILES = 1024
 
     def _fs_cmd(self, *args):
-        return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", *args)
+        return self.get_ceph_cmd_stdout("fs", *args)
 
     def _raw_cmd(self, *args):
-        return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args)
+        return self.get_ceph_cmd_stdout(args)
 
     def __check_clone_state(self, state, clone, clone_group=None, timo=120):
         check = 0
@@ -165,35 +160,24 @@ class TestVolumesHelper(CephFSTestCase):
         self._verify_clone_root(path1, path2, clone, clone_group, clone_pool)
         self._verify_clone_attrs(path1, path2)
 
-    def _generate_random_volume_name(self, count=1):
-        n = self.volume_start
-        volumes = [f"{TestVolumes.TEST_VOLUME_PREFIX}_{i:016}" for i in range(n, n+count)]
-        self.volume_start += count
-        return volumes[0] if count == 1 else volumes
-
-    def _generate_random_subvolume_name(self, count=1):
-        n = self.subvolume_start
-        subvolumes = [f"{TestVolumes.TEST_SUBVOLUME_PREFIX}_{i:016}" for i in range(n, n+count)]
-        self.subvolume_start += count
-        return subvolumes[0] if count == 1 else subvolumes
-
-    def _generate_random_group_name(self, count=1):
-        n = self.group_start
-        groups = [f"{TestVolumes.TEST_GROUP_PREFIX}_{i:016}" for i in range(n, n+count)]
-        self.group_start += count
-        return groups[0] if count == 1 else groups
-
-    def _generate_random_snapshot_name(self, count=1):
-        n = self.snapshot_start
-        snaps = [f"{TestVolumes.TEST_SNAPSHOT_PREFIX}_{i:016}" for i in range(n, n+count)]
-        self.snapshot_start += count
-        return snaps[0] if count == 1 else snaps
-
-    def _generate_random_clone_name(self, count=1):
-        n = self.clone_start
-        clones = [f"{TestVolumes.TEST_CLONE_PREFIX}_{i:016}" for i in range(n, n+count)]
-        self.clone_start += count
-        return clones[0] if count == 1 else clones
+    def _gen_name(self, name, n):
+        names = [f'{name}{random.randrange(0, 9999)}{i}' for i in range(n)]
+        return names[0] if n == 1 else names
+
+    def _gen_vol_name(self, n=1):
+        return self._gen_name('vol', n)
+
+    def _gen_subvol_name(self, n=1):
+        return self._gen_name('subvol', n)
+
+    def _gen_subvol_grp_name(self, n=1):
+        return self._gen_name('subvol_grp', n)
+
+    def _gen_subvol_snap_name(self, n=1):
+        return self._gen_name('subvol_snap', n)
+
+    def _gen_subvol_clone_name(self, n=1):
+        return self._gen_name('subvol_clone', n)
 
     def _enable_multi_fs(self):
         self._fs_cmd("flag", "set", "enable_multiple", "true", "--yes-i-really-mean-it")
@@ -202,7 +186,7 @@ class TestVolumesHelper(CephFSTestCase):
         result = json.loads(self._fs_cmd("volume", "ls"))
         if len(result) == 0:
             self.vol_created = True
-            self.volname = self._generate_random_volume_name()
+            self.volname = self._gen_vol_name()
             self._fs_cmd("volume", "create", self.volname)
         else:
             self.volname = result[0]['name']
@@ -393,14 +377,16 @@ class TestVolumesHelper(CephFSTestCase):
         """.format(authid=authid,key=key))
 
         guest_mount.client_id = authid
-        guest_mount.client_remote.write_file(guest_mount.get_keyring_path(),
-                                             keyring_txt, sudo=True)
+        guest_keyring_path = guest_mount.client_remote.mktemp(
+            data=keyring_txt)
         # Add a guest client section to the ceph config file.
         self.config_set("client.{0}".format(authid), "debug client", 20)
         self.config_set("client.{0}".format(authid), "debug objecter", 20)
         self.set_conf("client.{0}".format(authid),
                       "keyring", guest_mount.get_keyring_path())
 
+        return guest_keyring_path
+
     def _auth_metadata_get(self, filedata):
         """
         Return a deserialized JSON object, or None
@@ -418,11 +404,6 @@ class TestVolumesHelper(CephFSTestCase):
         self._enable_multi_fs()
         self._create_or_reuse_test_volume()
         self.config_set('mon', 'mon_allow_pool_delete', True)
-        self.volume_start = random.randint(1, (1<<20))
-        self.subvolume_start = random.randint(1, (1<<20))
-        self.group_start = random.randint(1, (1<<20))
-        self.snapshot_start = random.randint(1, (1<<20))
-        self.clone_start = random.randint(1, (1<<20))
 
     def tearDown(self):
         if self.vol_created:
@@ -436,7 +417,7 @@ class TestVolumes(TestVolumesHelper):
         """
         That the volume can be created and then cleans up
         """
-        volname = self._generate_random_volume_name()
+        volname = self._gen_vol_name()
         self._fs_cmd("volume", "create", volname)
         volumels = json.loads(self._fs_cmd("volume", "ls"))
 
@@ -467,7 +448,7 @@ class TestVolumes(TestVolumesHelper):
         volumes = [volume['name'] for volume in vls]
 
         #create new volumes and add it to the existing list of volumes
-        volumenames = self._generate_random_volume_name(2)
+        volumenames = self._gen_vol_name(2)
         for volumename in volumenames:
             self._fs_cmd("volume", "create", volumename)
         volumes.extend(volumenames)
@@ -562,6 +543,102 @@ class TestVolumes(TestVolumesHelper):
             self.assertNotIn(pool["name"], pools,
                              "pool {0} exists after volume removal".format(pool["name"]))
 
+    def test_volume_info(self):
+        """
+        Tests the 'fs volume info' command
+        """
+        vol_fields = ["pools", "used_size", "pending_subvolume_deletions", "mon_addrs"]
+        group = self._gen_subvol_grp_name()
+        # create subvolumegroup
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+        # get volume metadata
+        vol_info = json.loads(self._get_volume_info(self.volname))
+        for md in vol_fields:
+            self.assertIn(md, vol_info,
+                          f"'{md}' key not present in metadata of volume")
+        self.assertEqual(vol_info["used_size"], 0,
+                         "Size should be zero when volumes directory is empty")
+
+    def test_volume_info_pending_subvol_deletions(self):
+        """
+        Tests the pending_subvolume_deletions in 'fs volume info' command
+        """
+        subvolname = self._gen_subvol_name()
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolname, "--mode=777")
+        # create 3K zero byte files
+        self._do_subvolume_io(subvolname, number_of_files=3000, file_size=0)
+        # Delete the subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolname)
+        # get volume metadata
+        vol_info = json.loads(self._get_volume_info(self.volname))
+        self.assertNotEqual(vol_info['pending_subvolume_deletions'], 0,
+                            "pending_subvolume_deletions should be 1")
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_volume_info_without_subvolumegroup(self):
+        """
+        Tests the 'fs volume info' command without subvolume group
+        """
+        vol_fields = ["pools", "mon_addrs"]
+        # get volume metadata
+        vol_info = json.loads(self._get_volume_info(self.volname))
+        for md in vol_fields:
+            self.assertIn(md, vol_info,
+                          f"'{md}' key not present in metadata of volume")
+        self.assertNotIn("used_size", vol_info,
+                         "'used_size' should not be present in absence of subvolumegroup")
+        self.assertNotIn("pending_subvolume_deletions", vol_info,
+                         "'pending_subvolume_deletions' should not be present in absence"
+                         " of subvolumegroup")
+
+    def test_volume_info_with_human_readable_flag(self):
+        """
+        Tests the 'fs volume info --human_readable' command
+        """
+        vol_fields = ["pools", "used_size", "pending_subvolume_deletions", "mon_addrs"]
+        group = self._gen_subvol_grp_name()
+        # create subvolumegroup
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+        # get volume metadata
+        vol_info = json.loads(self._get_volume_info(self.volname, "--human_readable"))
+        for md in vol_fields:
+            self.assertIn(md, vol_info,
+                          f"'{md}' key not present in metadata of volume")
+        units = [' ', 'k', 'M', 'G', 'T', 'P', 'E']
+        assert vol_info["used_size"][-1] in units, "unit suffix in used_size is absent"
+        assert vol_info["pools"]["data"][0]["avail"][-1] in units, "unit suffix in avail data is absent"
+        assert vol_info["pools"]["data"][0]["used"][-1] in units, "unit suffix in used data is absent"
+        assert vol_info["pools"]["metadata"][0]["avail"][-1] in units, "unit suffix in avail metadata is absent"
+        assert vol_info["pools"]["metadata"][0]["used"][-1] in units, "unit suffix in used metadata is absent"
+        self.assertEqual(int(vol_info["used_size"]), 0,
+                         "Size should be zero when volumes directory is empty")
+
+    def test_volume_info_with_human_readable_flag_without_subvolumegroup(self):
+        """
+        Tests the 'fs volume info --human_readable' command without subvolume group
+        """
+        vol_fields = ["pools", "mon_addrs"]
+        # get volume metadata
+        vol_info = json.loads(self._get_volume_info(self.volname, "--human_readable"))
+        for md in vol_fields:
+            self.assertIn(md, vol_info,
+                          f"'{md}' key not present in metadata of volume")
+        units = [' ', 'k', 'M', 'G', 'T', 'P', 'E']
+        assert vol_info["pools"]["data"][0]["avail"][-1] in units, "unit suffix in avail data is absent"
+        assert vol_info["pools"]["data"][0]["used"][-1] in units, "unit suffix in used data is absent"
+        assert vol_info["pools"]["metadata"][0]["avail"][-1] in units, "unit suffix in avail metadata is absent"
+        assert vol_info["pools"]["metadata"][0]["used"][-1] in units, "unit suffix in used metadata is absent"
+        self.assertNotIn("used_size", vol_info,
+                         "'used_size' should not be present in absence of subvolumegroup")
+        self.assertNotIn("pending_subvolume_deletions", vol_info,
+                         "'pending_subvolume_deletions' should not be present in absence"
+                         " of subvolumegroup")
+
+
+class TestRenameCmd(TestVolumesHelper):
+
     def test_volume_rename(self):
         """
         That volume, its file system and pools, can be renamed.
@@ -569,7 +646,7 @@ class TestVolumes(TestVolumesHelper):
         for m in self.mounts:
             m.umount_wait()
         oldvolname = self.volname
-        newvolname = self._generate_random_volume_name()
+        newvolname = self._gen_vol_name()
         new_data_pool, new_metadata_pool = f"cephfs.{newvolname}.data", f"cephfs.{newvolname}.meta"
         self._fs_cmd("volume", "rename", oldvolname, newvolname,
                      "--yes-i-really-mean-it")
@@ -590,7 +667,7 @@ class TestVolumes(TestVolumesHelper):
         for m in self.mounts:
             m.umount_wait()
         oldvolname = self.volname
-        newvolname = self._generate_random_volume_name()
+        newvolname = self._gen_vol_name()
         new_data_pool, new_metadata_pool = f"cephfs.{newvolname}.data", f"cephfs.{newvolname}.meta"
         self._fs_cmd("volume", "rename", oldvolname, newvolname,
                      "--yes-i-really-mean-it")
@@ -608,7 +685,8 @@ class TestVolumes(TestVolumesHelper):
         """
         That renaming volume fails without --yes-i-really-mean-it flag.
         """
-        newvolname = self._generate_random_volume_name()
+        newvolname = self._gen_vol_name()
+
         try:
             self._fs_cmd("volume", "rename", self.volname, newvolname)
         except CommandFailedError as ce:
@@ -628,7 +706,7 @@ class TestVolumes(TestVolumesHelper):
             m.umount_wait()
         self.fs.add_data_pool('another-data-pool')
         oldvolname = self.volname
-        newvolname = self._generate_random_volume_name()
+        newvolname = self._gen_vol_name()
         self.fs.get_pool_names(refresh=True)
         orig_data_pool_names = list(self.fs.data_pools.values())
         new_metadata_pool = f"cephfs.{newvolname}.meta"
@@ -650,7 +728,7 @@ class TestVolumes(TestVolumesHelper):
         Tests the 'fs volume info' command
         """
         vol_fields = ["pools", "used_size", "pending_subvolume_deletions", "mon_addrs"]
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         # create subvolumegroup
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
         # get volume metadata
@@ -665,7 +743,7 @@ class TestVolumes(TestVolumesHelper):
         """
         Tests the pending_subvolume_deletions in 'fs volume info' command
         """
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolname, "--mode=777")
         # create 3K zero byte files
@@ -700,7 +778,7 @@ class TestVolumes(TestVolumesHelper):
         Tests the 'fs volume info --human_readable' command
         """
         vol_fields = ["pools", "used_size", "pending_subvolume_deletions", "mon_addrs"]
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         # create subvolumegroup
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
         # get volume metadata
@@ -742,7 +820,7 @@ class TestVolumes(TestVolumesHelper):
 class TestSubvolumeGroups(TestVolumesHelper):
     """Tests for FS subvolume group operations."""
     def test_default_uid_gid_subvolume_group(self):
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         expected_uid = 0
         expected_gid = 0
 
@@ -759,7 +837,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         self._fs_cmd("subvolumegroup", "rm", self.volname, group)
 
     def test_nonexistent_subvolume_group_create(self):
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         group = "non_existent_group"
 
         # try, creating subvolume in a nonexistent group
@@ -784,7 +862,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
             raise RuntimeError("expected the 'fs subvolumegroup rm' command to fail")
 
     def test_subvolume_group_create_with_auto_cleanup_on_fail(self):
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         data_pool = "invalid_pool"
         # create group with invalid data pool layout
         with self.assertRaises(CommandFailedError):
@@ -800,7 +878,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
             raise RuntimeError("expected the 'fs subvolumegroup getpath' command to fail")
 
     def test_subvolume_group_create_with_desired_data_pool_layout(self):
-        group1, group2 = self._generate_random_group_name(2)
+        group1, group2 = self._gen_subvol_grp_name(2)
 
         # create group
         self._fs_cmd("subvolumegroup", "create", self.volname, group1)
@@ -828,7 +906,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         self._fs_cmd("subvolumegroup", "rm", self.volname, group2)
 
     def test_subvolume_group_create_with_desired_mode(self):
-        group1, group2 = self._generate_random_group_name(2)
+        group1, group2 = self._gen_subvol_grp_name(2)
         # default mode
         expected_mode1 = "755"
         # desired mode
@@ -862,7 +940,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         gid = 1000
 
         # create subvolume group
-        subvolgroupname = self._generate_random_group_name()
+        subvolgroupname = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, subvolgroupname, "--uid", str(uid), "--gid", str(gid))
 
         # make sure it exists
@@ -879,7 +957,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         self._fs_cmd("subvolumegroup", "rm", self.volname, subvolgroupname)
 
     def test_subvolume_group_create_with_invalid_data_pool_layout(self):
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         data_pool = "invalid_pool"
         # create group with invalid data pool layout
         try:
@@ -892,7 +970,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
 
     def test_subvolume_group_create_with_size(self):
         # create group with size -- should set quota
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000")
 
         # get group metadata
@@ -909,7 +987,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
                      "data_pool", "gid", "mode", "mon_addrs", "mtime", "uid"]
 
         # create group
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
 
         # get group metadata
@@ -938,7 +1016,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
 
     def test_subvolume_group_create_idempotence(self):
         # create group
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
 
         # try creating w/ same subvolume group name -- should be idempotent
@@ -949,7 +1027,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
 
     def test_subvolume_group_create_idempotence_mode(self):
         # create group
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
 
         # try creating w/ same subvolume group name with mode -- should set mode
@@ -969,7 +1047,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         desired_gid = 1000
 
         # create group
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
 
         # try creating w/ same subvolume group name with uid/gid -- should set uid/gid
@@ -988,7 +1066,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
 
     def test_subvolume_group_create_idempotence_data_pool(self):
         # create group
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
 
         group_path = self._get_subvolume_group_path(self.volname, group)
@@ -1013,7 +1091,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
 
     def test_subvolume_group_create_idempotence_resize(self):
         # create group
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
 
         # try creating w/ same subvolume name with size -- should set quota
@@ -1035,7 +1113,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         """
         osize = self.DEFAULT_FILE_SIZE*1024*1024*100
         # create group with 100MB quota
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group,
                      "--size", str(osize), "--mode=777")
 
@@ -1044,7 +1122,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         self.assertNotEqual(grouppath, None)
 
         # create subvolume under the group
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolname,
                      "--group_name", group, "--mode=777")
 
@@ -1054,7 +1132,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
 
         # Create auth_id
         authid = "client.guest1"
-        user = json.loads(self.fs.mon_manager.raw_cluster_cmd(
+        user = json.loads(self.get_ceph_cmd_stdout(
             "auth", "get-or-create", authid,
             "mds", "allow rw path=/volumes",
             "mgr", "allow rw",
@@ -1068,11 +1146,12 @@ class TestSubvolumeGroups(TestVolumesHelper):
         guest_mount.umount_wait()
 
         # configure credentials for guest client
-        self._configure_guest_auth(guest_mount, "guest1", user[0]["key"])
-
+        guest_keyring_path = self._configure_guest_auth(
+            guest_mount, "guest1", user[0]["key"])
         # mount the subvolume
         mount_path = os.path.join("/", subvolpath)
-        guest_mount.mount_wait(cephfs_mntpt=mount_path)
+        guest_mount.mount_wait(cephfs_mntpt=mount_path,
+                               client_keyring_path=guest_keyring_path)
 
         # create 99 files of 1MB
         guest_mount.run_shell_payload("mkdir -p dir1")
@@ -1119,7 +1198,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         """
         osize = self.DEFAULT_FILE_SIZE*1024*1024*100
         # create group with 100MB quota
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group,
                      "--size", str(osize), "--mode=777")
 
@@ -1128,7 +1207,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         self.assertNotEqual(grouppath, None)
 
         # create subvolume under the group
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolname,
                      "--group_name", group, "--mode=777")
 
@@ -1140,7 +1219,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
 
         # Create auth_id
         authid = "client.guest1"
-        user = json.loads(self.fs.mon_manager.raw_cluster_cmd(
+        user = json.loads(self.get_ceph_cmd_stdout(
             "auth", "get-or-create", authid,
             "mds", f"allow rw path={mount_path}",
             "mgr", "allow rw",
@@ -1154,10 +1233,11 @@ class TestSubvolumeGroups(TestVolumesHelper):
         guest_mount.umount_wait()
 
         # configure credentials for guest client
-        self._configure_guest_auth(guest_mount, "guest1", user[0]["key"])
-
+        guest_keyring_path = self._configure_guest_auth(
+            guest_mount, "guest1", user[0]["key"])
         # mount the subvolume
-        guest_mount.mount_wait(cephfs_mntpt=mount_path)
+        guest_mount.mount_wait(cephfs_mntpt=mount_path,
+                               client_keyring_path=guest_keyring_path)
 
         # create 99 files of 1MB to exceed quota
         guest_mount.run_shell_payload("mkdir -p dir1")
@@ -1200,7 +1280,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         """
         osize = self.DEFAULT_FILE_SIZE*1024*1024*100
         # create group with 100MB quota
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group,
                      "--size", str(osize), "--mode=777")
 
@@ -1209,7 +1289,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         self.assertNotEqual(grouppath, None)
 
         # create subvolume under the group
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolname,
                      "--group_name", group, "--mode=777")
 
@@ -1243,9 +1323,9 @@ class TestSubvolumeGroups(TestVolumesHelper):
         """
         Tests retained snapshot subvolume removal if it's group quota is exceeded
         """
-        group = self._generate_random_group_name()
-        subvolname = self._generate_random_subvolume_name()
-        snapshot1, snapshot2 = self._generate_random_snapshot_name(2)
+        group = self._gen_subvol_grp_name()
+        subvolname = self._gen_subvol_name()
+        snapshot1, snapshot2 = self._gen_subvol_snap_name(2)
 
         osize = self.DEFAULT_FILE_SIZE*1024*1024*100
         # create group with 100MB quota
@@ -1301,11 +1381,11 @@ class TestSubvolumeGroups(TestVolumesHelper):
         Tests subvolume removal if it's group quota is set.
         """
         # create group with size -- should set quota
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000")
 
         # create subvolume under the group
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group)
 
         # remove subvolume
@@ -1324,8 +1404,8 @@ class TestSubvolumeGroups(TestVolumesHelper):
         """
         Tests legacy subvolume removal if it's group quota is set.
         """
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # emulate a old-fashioned subvolume -- in a custom group
         createpath1 = os.path.join(".", "volumes", group, subvolume)
@@ -1358,8 +1438,8 @@ class TestSubvolumeGroups(TestVolumesHelper):
         """
         Tests v1 subvolume removal if it's group quota is set.
         """
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # emulate a v1 subvolume -- in a custom group
         self._create_v1_subvolume(subvolume, subvol_group=group, has_snapshot=False)
@@ -1386,7 +1466,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
 
         osize = self.DEFAULT_FILE_SIZE*1024*1024
         # create group with 1MB quota
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize))
 
         # make sure it exists
@@ -1417,7 +1497,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
 
         osize = self.DEFAULT_FILE_SIZE*1024*1024
         # create group with 1MB quota
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize))
 
         # make sure it exists
@@ -1449,7 +1529,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
 
         osize = self.DEFAULT_FILE_SIZE*1024*1024*20
         # create group with 20MB quota
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group,
                      "--size", str(osize), "--mode=777")
 
@@ -1458,7 +1538,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         self.assertNotEqual(grouppath, None)
 
         # create subvolume under the group
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolname,
                      "--group_name", group, "--mode=777")
 
@@ -1503,7 +1583,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
 
         osize = self.DEFAULT_FILE_SIZE*1024*1024*20
         # create group with 20MB quota
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group,
                      "--size", str(osize), "--mode=777")
 
@@ -1512,7 +1592,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         self.assertNotEqual(grouppath, None)
 
         # create subvolume under the group
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolname,
                      "--group_name", group, "--mode=777")
 
@@ -1558,7 +1638,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
 
         osize = self.DEFAULT_FILE_SIZE*1024*1024*100
         # create group with 100MB quota
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group,
                      "--size", str(osize), "--mode=777")
 
@@ -1567,7 +1647,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         self.assertNotEqual(grouppath, None)
 
         # create subvolume under the group
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolname,
                      "--group_name", group, "--mode=777")
 
@@ -1612,7 +1692,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
 
         osize = self.DEFAULT_FILE_SIZE*1024*1024
         # create group
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group,
                      "--size", str(osize))
 
@@ -1637,7 +1717,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
 
         osize = self.DEFAULT_FILE_SIZE*1024*1024*5
         # create group with 5MB quota
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group,
                      "--size", str(osize), "--mode=777")
 
@@ -1646,7 +1726,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         self.assertNotEqual(grouppath, None)
 
         # create subvolume under the group
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolname,
                      "--group_name", group, "--mode=777")
 
@@ -1695,7 +1775,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         subvolumegroups = []
 
         #create subvolumegroups
-        subvolumegroups = self._generate_random_group_name(3)
+        subvolumegroups = self._gen_subvol_grp_name(3)
         for groupname in subvolumegroups:
             self._fs_cmd("subvolumegroup", "create", self.volname, groupname)
 
@@ -1713,12 +1793,12 @@ class TestSubvolumeGroups(TestVolumesHelper):
         subvolumegroups = []
 
         #create subvolumegroup
-        subvolumegroups = self._generate_random_group_name(3)
+        subvolumegroups = self._gen_subvol_grp_name(3)
         for groupname in subvolumegroups:
             self._fs_cmd("subvolumegroup", "create", self.volname, groupname)
 
         # create subvolume and remove. This creates '_deleting' directory.
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
         self._fs_cmd("subvolume", "rm", self.volname, subvolume)
 
@@ -1731,10 +1811,10 @@ class TestSubvolumeGroups(TestVolumesHelper):
         # tests the 'fs subvolumegroup ls' command filters internal directories
         # eg: '_deleting', '_nogroup', '_index', "_legacy"
 
-        subvolumegroups = self._generate_random_group_name(3)
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolumegroups = self._gen_subvol_grp_name(3)
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         #create subvolumegroups
         for groupname in subvolumegroups:
@@ -1789,7 +1869,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         group = "pinme"
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
         self._fs_cmd("subvolumegroup", "pin", self.volname, group, "distributed", "True")
-        subvolumes = self._generate_random_subvolume_name(50)
+        subvolumes = self._gen_subvol_name(50)
         for subvolume in subvolumes:
             self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
         self._wait_distributed_subtrees(2 * 2, status=status, rank="all")
@@ -1803,7 +1883,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
 
     def test_subvolume_group_rm_force(self):
         # test removing non-existing subvolume group with --force
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         try:
             self._fs_cmd("subvolumegroup", "rm", self.volname, group, "--force")
         except CommandFailedError:
@@ -1812,7 +1892,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
     def test_subvolume_group_exists_with_subvolumegroup_and_no_subvolume(self):
         """Test the presence of any subvolumegroup when only subvolumegroup is present"""
 
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         # create subvolumegroup
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
         ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
@@ -1832,8 +1912,8 @@ class TestSubvolumeGroups(TestVolumesHelper):
         """Test the presence of any subvolume when subvolumegroup
             and subvolume both are present"""
 
-        group = self._generate_random_group_name()
-        subvolume = self._generate_random_subvolume_name(2)
+        group = self._gen_subvol_grp_name()
+        subvolume = self._gen_subvol_name(2)
         # create subvolumegroup
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
         # create subvolume in group
@@ -1859,7 +1939,7 @@ class TestSubvolumeGroups(TestVolumesHelper):
         """Test the presence of any subvolume when subvolume is present
             but no subvolumegroup is present"""
 
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
         ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
@@ -1869,11 +1949,37 @@ class TestSubvolumeGroups(TestVolumesHelper):
         ret = self._fs_cmd("subvolumegroup", "exist", self.volname)
         self.assertEqual(ret.strip('\n'), "no subvolumegroup exists")
 
+    def test_subvolume_group_rm_when_its_not_empty(self):
+        group = self._gen_subvol_grp_name()
+        subvolume = self._gen_subvol_name()
+
+        # create subvolumegroup
+        self._fs_cmd("subvolumegroup", "create", self.volname, group)
+        # create subvolume in group
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
+        # try, remove subvolume group
+        try:
+            self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+        except CommandFailedError as ce:
+            self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on deleting "
+                             "subvolumegroup when it is not empty")
+        else:
+            self.fail("expected the 'fs subvolumegroup rm' command to fail")
+        
+        # delete subvolume
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
+
+        # delete subvolumegroup
+        self._fs_cmd("subvolumegroup", "rm", self.volname, group)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
 
 class TestSubvolumes(TestVolumesHelper):
     """Tests for FS subvolume operations, except snapshot and snapshot clone."""
     def test_async_subvolume_rm(self):
-        subvolumes = self._generate_random_subvolume_name(100)
+        subvolumes = self._gen_subvol_name(100)
 
         # create subvolumes
         for subvolume in subvolumes:
@@ -1892,7 +1998,7 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty(timeout=300)
 
     def test_default_uid_gid_subvolume(self):
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         expected_uid = 0
         expected_gid = 0
 
@@ -1926,7 +2032,7 @@ class TestSubvolumes(TestVolumesHelper):
 
     def test_subvolume_create_and_rm(self):
         # create subvolume
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
 
         # make sure it exists
@@ -1948,8 +2054,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_create_and_rm_in_group(self):
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # create group
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -1968,7 +2074,7 @@ class TestSubvolumes(TestVolumesHelper):
 
     def test_subvolume_create_idempotence(self):
         # create subvolume
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
 
         # try creating w/ same subvolume name -- should be idempotent
@@ -1982,7 +2088,7 @@ class TestSubvolumes(TestVolumesHelper):
 
     def test_subvolume_create_idempotence_resize(self):
         # create subvolume
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
 
         # try creating w/ same subvolume name with size -- should set quota
@@ -2003,7 +2109,7 @@ class TestSubvolumes(TestVolumesHelper):
         default_mode = "755"
 
         # create subvolume
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
 
         subvol_path = self._get_subvolume_path(self.volname, subvolume)
@@ -2027,7 +2133,7 @@ class TestSubvolumes(TestVolumesHelper):
     def test_subvolume_create_idempotence_without_passing_mode(self):
         # create subvolume
         desired_mode = "777"
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", desired_mode)
 
         subvol_path = self._get_subvolume_path(self.volname, subvolume)
@@ -2056,7 +2162,7 @@ class TestSubvolumes(TestVolumesHelper):
         """
 
         # create subvolume
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--namespace-isolated")
 
         # get subvolume metadata
@@ -2071,7 +2177,7 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_create_with_auto_cleanup_on_fail(self):
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         data_pool = "invalid_pool"
         # create subvolume with invalid data pool layout fails
         with self.assertRaises(CommandFailedError):
@@ -2089,8 +2195,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_create_with_desired_data_pool_layout_in_group(self):
-        subvol1, subvol2 = self._generate_random_subvolume_name(2)
-        group = self._generate_random_group_name()
+        subvol1, subvol2 = self._gen_subvol_name(2)
+        group = self._gen_subvol_grp_name()
 
         # create group. this also helps set default pool layout for subvolumes
         # created within the group.
@@ -2126,7 +2232,7 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_create_with_desired_mode(self):
-        subvol1 = self._generate_random_subvolume_name()
+        subvol1 = self._gen_subvol_name()
 
         # default mode
         default_mode = "755"
@@ -2156,9 +2262,9 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_create_with_desired_mode_in_group(self):
-        subvol1, subvol2, subvol3 = self._generate_random_subvolume_name(3)
+        subvol1, subvol2, subvol3 = self._gen_subvol_name(3)
 
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         # default mode
         expected_mode1 = "755"
         # desired mode
@@ -2202,7 +2308,7 @@ class TestSubvolumes(TestVolumesHelper):
         gid = 1000
 
         # create subvolume
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolname, "--uid", str(uid), "--gid", str(gid))
 
         # make sure it exists
@@ -2222,7 +2328,7 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_create_with_invalid_data_pool_layout(self):
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         data_pool = "invalid_pool"
         # create subvolume with invalid data pool layout
         try:
@@ -2237,7 +2343,7 @@ class TestSubvolumes(TestVolumesHelper):
 
     def test_subvolume_create_with_invalid_size(self):
         # create subvolume with an invalid size -1
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         try:
             self._fs_cmd("subvolume", "create", self.volname, subvolume, "--size", "-1")
         except CommandFailedError as ce:
@@ -2254,7 +2360,7 @@ class TestSubvolumes(TestVolumesHelper):
         permission denied error if option --group=_nogroup is provided.
         """
 
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
 
         # try to create subvolume providing --group_name=_nogroup option
         try:
@@ -2289,7 +2395,7 @@ class TestSubvolumes(TestVolumesHelper):
         """
 
         # create subvolume
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         osize = self.DEFAULT_FILE_SIZE*1024*1024
         self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
 
@@ -2319,7 +2425,7 @@ class TestSubvolumes(TestVolumesHelper):
                      "type", "uid", "features", "state"]
 
         # create subvolume
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
 
         # get subvolume metadata
@@ -2367,7 +2473,7 @@ class TestSubvolumes(TestVolumesHelper):
         subvolumes = []
 
         # create subvolumes
-        subvolumes = self._generate_random_subvolume_name(3)
+        subvolumes = self._gen_subvol_name(3)
         for subvolume in subvolumes:
             self._fs_cmd("subvolume", "create", self.volname, subvolume)
 
@@ -2439,7 +2545,7 @@ class TestSubvolumes(TestVolumesHelper):
         """
         ensure a subvolume is marked with the ceph.dir.subvolume xattr
         """
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
@@ -2475,7 +2581,7 @@ class TestSubvolumes(TestVolumesHelper):
         self.fs.set_max_mds(2)
         status = self.fs.wait_for_daemons()
 
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
         self._fs_cmd("subvolume", "pin", self.volname, subvolume, "export", "1")
         path = self._fs_cmd("subvolume", "getpath", self.volname, subvolume)
@@ -2493,8 +2599,8 @@ class TestSubvolumes(TestVolumesHelper):
     ### authorize operations
 
     def test_authorize_deauthorize_legacy_subvolume(self):
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
         authid = "alice"
 
         guest_mount = self.mount_b
@@ -2519,10 +2625,11 @@ class TestSubvolumes(TestVolumesHelper):
         self.assertIn("client.{0}".format(authid), existing_ids)
 
         # configure credentials for guest client
-        self._configure_guest_auth(guest_mount, authid, key)
-
+        guest_keyring_path  = self._configure_guest_auth(
+            guest_mount, authid, key)
         # mount the subvolume, and write to it
-        guest_mount.mount_wait(cephfs_mntpt=mount_path)
+        guest_mount.mount_wait(cephfs_mntpt=mount_path,
+                               client_keyring_path=guest_keyring_path)
         guest_mount.write_n_mb("data.bin", 1)
 
         # authorize guest authID read access to subvolume
@@ -2551,8 +2658,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._fs_cmd("subvolumegroup", "rm", self.volname, group)
 
     def test_authorize_deauthorize_subvolume(self):
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
         authid = "alice"
 
         guest_mount = self.mount_b
@@ -2575,10 +2682,11 @@ class TestSubvolumes(TestVolumesHelper):
         self.assertIn("client.{0}".format(authid), existing_ids)
 
         # configure credentials for guest client
-        self._configure_guest_auth(guest_mount, authid, key)
-
+        guest_keyring_path = self._configure_guest_auth(
+            guest_mount, authid, key)
         # mount the subvolume, and write to it
-        guest_mount.mount_wait(cephfs_mntpt=mount_path)
+        guest_mount.mount_wait(cephfs_mntpt=mount_path,
+                               client_keyring_path=guest_keyring_path)
         guest_mount.write_n_mb("data.bin", 1)
 
         # authorize guest authID read access to subvolume
@@ -2614,8 +2722,8 @@ class TestSubvolumes(TestVolumesHelper):
         subvolumes is stored as a two-way mapping between auth
         IDs and subvolumes that they're authorized to access.
         """
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         guest_mount = self.mount_b
 
@@ -2722,8 +2830,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._fs_cmd("subvolumegroup", "rm", self.volname, group)
 
     def test_subvolume_authorized_list(self):
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
         authid1 = "alice"
         authid2 = "guest1"
         authid3 = "guest2"
@@ -2765,11 +2873,11 @@ class TestSubvolumes(TestVolumesHelper):
         it's not allowed to authorize the auth-id by default.
         """
 
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # Create auth_id
-        self.fs.mon_manager.raw_cluster_cmd(
+        self.run_ceph_cmd(
             "auth", "get-or-create", "client.guest1",
             "mds", "allow *",
             "osd", "allow rw",
@@ -2798,7 +2906,7 @@ class TestSubvolumes(TestVolumesHelper):
             self.fail("expected the 'fs subvolume authorize' command to fail")
 
         # clean up
-        self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+        self.run_ceph_cmd("auth", "rm", "client.guest1")
         self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
         self._fs_cmd("subvolumegroup", "rm", self.volname, group)
 
@@ -2809,11 +2917,11 @@ class TestSubvolumes(TestVolumesHelper):
         allowed with option allow_existing_id.
         """
 
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # Create auth_id
-        self.fs.mon_manager.raw_cluster_cmd(
+        self.run_ceph_cmd(
             "auth", "get-or-create", "client.guest1",
             "mds", "allow *",
             "osd", "allow rw",
@@ -2841,7 +2949,7 @@ class TestSubvolumes(TestVolumesHelper):
         # clean up
         self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id,
                      "--group_name", group)
-        self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+        self.run_ceph_cmd("auth", "rm", "client.guest1")
         self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
         self._fs_cmd("subvolumegroup", "rm", self.volname, group)
 
@@ -2852,8 +2960,8 @@ class TestSubvolumes(TestVolumesHelper):
         deauthorize. It should only remove caps associated with it.
         """
 
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         auth_id = "guest1"
         guestclient_1 = {
@@ -2875,7 +2983,7 @@ class TestSubvolumes(TestVolumesHelper):
                                   "--group_name", group).rstrip()
 
         # Update caps for guestclient_1 out of band
-        out = self.fs.mon_manager.raw_cluster_cmd(
+        out = self.get_ceph_cmd_stdout(
             "auth", "caps", "client.guest1",
             "mds", "allow rw path=/volumes/{0}, allow rw path={1}".format(group, subvol_path),
             "osd", "allow rw pool=cephfs_data",
@@ -2888,7 +2996,7 @@ class TestSubvolumes(TestVolumesHelper):
 
         # Validate the caps of guestclient_1 after deauthorize. It should not have deleted
         # guestclient_1. The mgr and mds caps should be present which was updated out of band.
-        out = json.loads(self.fs.mon_manager.raw_cluster_cmd("auth", "get", "client.guest1", "--format=json-pretty"))
+        out = json.loads(self.get_ceph_cmd_stdout("auth", "get", "client.guest1", "--format=json-pretty"))
 
         self.assertEqual("client.guest1", out[0]["entity"])
         self.assertEqual("allow rw path=/volumes/{0}".format(group), out[0]["caps"]["mds"])
@@ -2896,7 +3004,7 @@ class TestSubvolumes(TestVolumesHelper):
         self.assertNotIn("osd", out[0]["caps"])
 
         # clean up
-        out = self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+        out = self.get_ceph_cmd_stdout("auth", "rm", "client.guest1")
         self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
         self._fs_cmd("subvolumegroup", "rm", self.volname, group)
 
@@ -2909,8 +3017,8 @@ class TestSubvolumes(TestVolumesHelper):
 
         guest_mount = self.mount_b
 
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         auth_id = "guest1"
         guestclient_1 = {
@@ -2948,7 +3056,7 @@ class TestSubvolumes(TestVolumesHelper):
         # clean up
         self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id, "--group_name", group)
         guest_mount.umount_wait()
-        self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+        self.run_ceph_cmd("auth", "rm", "client.guest1")
         self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group)
         self._fs_cmd("subvolumegroup", "rm", self.volname, group)
 
@@ -2961,8 +3069,8 @@ class TestSubvolumes(TestVolumesHelper):
 
         guest_mount = self.mount_b
 
-        subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
-        group = self._generate_random_group_name()
+        subvolume1, subvolume2 = self._gen_subvol_name(2)
+        group = self._gen_subvol_grp_name()
 
         guestclient_1 = {
             "auth_id": "guest1",
@@ -3004,7 +3112,7 @@ class TestSubvolumes(TestVolumesHelper):
         # clean up
         self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, "guest1", "--group_name", group)
         guest_mount.umount_wait()
-        self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+        self.run_ceph_cmd("auth", "rm", "client.guest1")
         self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group)
         self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group)
         self._fs_cmd("subvolumegroup", "rm", self.volname, group)
@@ -3019,8 +3127,8 @@ class TestSubvolumes(TestVolumesHelper):
 
         guest_mount = self.mount_b
 
-        subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
-        group = self._generate_random_group_name()
+        subvolume1, subvolume2 = self._gen_subvol_name(2)
+        group = self._gen_subvol_grp_name()
 
         auth_id = "guest1"
         guestclient_1 = {
@@ -3079,7 +3187,7 @@ class TestSubvolumes(TestVolumesHelper):
         self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, auth_id, "--group_name", group)
         self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume2, auth_id, "--group_name", group)
         guest_mount.umount_wait()
-        self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+        self.run_ceph_cmd("auth", "rm", "client.guest1")
         self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group)
         self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group)
         self._fs_cmd("subvolumegroup", "rm", self.volname, group)
@@ -3094,8 +3202,8 @@ class TestSubvolumes(TestVolumesHelper):
 
         guest_mount = self.mount_b
 
-        subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
-        group = self._generate_random_group_name()
+        subvolume1, subvolume2 = self._gen_subvol_name(2)
+        group = self._gen_subvol_grp_name()
 
         auth_id = "guest1"
         guestclient_1 = {
@@ -3151,7 +3259,7 @@ class TestSubvolumes(TestVolumesHelper):
         # clean up
         self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, auth_id, "--group_name", group)
         guest_mount.umount_wait()
-        self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1")
+        self.run_ceph_cmd("auth", "rm", "client.guest1")
         self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group)
         self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group)
         self._fs_cmd("subvolumegroup", "rm", self.volname, group)
@@ -3161,8 +3269,8 @@ class TestSubvolumes(TestVolumesHelper):
         That a subvolume client can be evicted based on the auth ID
         """
 
-        subvolumes = self._generate_random_subvolume_name(2)
-        group = self._generate_random_group_name()
+        subvolumes = self._gen_subvol_name(2)
+        group = self._gen_subvol_grp_name()
 
         # create group
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -3189,11 +3297,14 @@ class TestSubvolumes(TestVolumesHelper):
 
             mount_path = self._fs_cmd("subvolume", "getpath", self.volname, subvolumes[i],
                                       "--group_name", group).rstrip()
-            # configure credentials for guest client
-            self._configure_guest_auth(guest_mounts[i], auth_id, key)
 
+            # configure credentials for guest client
+            guest_keyring_path = self._configure_guest_auth(guest_mounts[i],
+                                                            auth_id, key)
             # mount the subvolume, and write to it
-            guest_mounts[i].mount_wait(cephfs_mntpt=mount_path)
+            guest_mounts[i].mount_wait(
+                cephfs_mntpt=mount_path,
+                client_keyring_path=guest_keyring_path)
             guest_mounts[i].write_n_mb("data.bin", 1)
 
         # Evict client, guest_mounts[0], using auth ID 'guest' and has mounted
@@ -3230,7 +3341,7 @@ class TestSubvolumes(TestVolumesHelper):
         self.fs.wait_for_daemons()
         self.config_set('mds', 'mds_export_ephemeral_random', True)
 
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
         self._fs_cmd("subvolume", "pin", self.volname, subvolume, "random", ".01")
         # no verification
@@ -3248,7 +3359,7 @@ class TestSubvolumes(TestVolumesHelper):
 
         osize = self.DEFAULT_FILE_SIZE*1024*1024
         # create subvolume
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
 
         # make sure it exists
@@ -3281,7 +3392,7 @@ class TestSubvolumes(TestVolumesHelper):
 
         osize = self.DEFAULT_FILE_SIZE*1024*1024
         # create subvolume
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
 
         # make sure it exists
@@ -3315,7 +3426,7 @@ class TestSubvolumes(TestVolumesHelper):
 
         osize = self.DEFAULT_FILE_SIZE*1024*1024*20
         # create subvolume
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777")
 
         # make sure it exists
@@ -3362,7 +3473,7 @@ class TestSubvolumes(TestVolumesHelper):
 
         osize = self.DEFAULT_FILE_SIZE*1024*1024*20
         # create subvolume
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777")
 
         # make sure it exists
@@ -3410,7 +3521,7 @@ class TestSubvolumes(TestVolumesHelper):
 
         osize = self.DEFAULT_FILE_SIZE*1024*1024*10
         # create subvolume of quota 10MB and make sure it exists
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777")
         subvolpath = self._get_subvolume_path(self.volname, subvolname)
         self.assertNotEqual(subvolpath, None)
@@ -3458,7 +3569,7 @@ class TestSubvolumes(TestVolumesHelper):
         """
 
         # create subvolume
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size",
                      str(self.DEFAULT_FILE_SIZE*1024*1024))
 
@@ -3485,7 +3596,7 @@ class TestSubvolumes(TestVolumesHelper):
         """
 
         # create subvolume
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size",
                      str(self.DEFAULT_FILE_SIZE*1024*1024*5), "--mode=777")
 
@@ -3522,7 +3633,7 @@ class TestSubvolumes(TestVolumesHelper):
 
     def test_subvolume_rm_force(self):
         # test removing non-existing subvolume with --force
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         try:
             self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--force")
         except CommandFailedError:
@@ -3531,8 +3642,8 @@ class TestSubvolumes(TestVolumesHelper):
     def test_subvolume_exists_with_subvolumegroup_and_subvolume(self):
         """Test the presence of any subvolume by specifying the name of subvolumegroup"""
 
-        group = self._generate_random_group_name()
-        subvolume1 = self._generate_random_subvolume_name()
+        group = self._gen_subvol_grp_name()
+        subvolume1 = self._gen_subvol_name()
         # create subvolumegroup
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
         # create subvolume in group
@@ -3550,7 +3661,7 @@ class TestSubvolumes(TestVolumesHelper):
         """Test the presence of any subvolume specifying the name
             of subvolumegroup and no subvolumes"""
 
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         # create subvolumegroup
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
         ret = self._fs_cmd("subvolume", "exist", self.volname, "--group_name", group)
@@ -3562,7 +3673,7 @@ class TestSubvolumes(TestVolumesHelper):
         """Test the presence of any subvolume without specifying the name
             of subvolumegroup"""
 
-        subvolume1 = self._generate_random_subvolume_name()
+        subvolume1 = self._gen_subvol_name()
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume1)
         ret = self._fs_cmd("subvolume", "exist", self.volname)
@@ -3585,7 +3696,7 @@ class TestSubvolumes(TestVolumesHelper):
         """
 
         # create subvolume
-        subvolname = self._generate_random_subvolume_name()
+        subvolname = self._gen_subvol_name()
         osize = self.DEFAULT_FILE_SIZE*1024*1024
         self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize))
 
@@ -3614,8 +3725,8 @@ class TestSubvolumes(TestVolumesHelper):
         is cleaned up. The subvolume deletion issued while the trash directory is not empty, should pass and should
         not error out with EAGAIN.
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -3644,8 +3755,8 @@ class TestSubvolumes(TestVolumesHelper):
 
 
     def test_subvolume_user_metadata_set(self):
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -3668,8 +3779,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_user_metadata_set_idempotence(self):
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -3698,8 +3809,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_user_metadata_get(self):
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -3731,8 +3842,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_user_metadata_get_for_nonexisting_key(self):
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -3761,8 +3872,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_user_metadata_get_for_nonexisting_section(self):
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -3786,8 +3897,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_user_metadata_update(self):
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -3823,8 +3934,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_user_metadata_list(self):
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -3856,8 +3967,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_user_metadata_list_if_no_metadata_set(self):
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -3885,8 +3996,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_user_metadata_remove(self):
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -3920,8 +4031,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_user_metadata_remove_for_nonexisting_key(self):
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -3950,8 +4061,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_user_metadata_remove_for_nonexisting_section(self):
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -3975,8 +4086,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_user_metadata_remove_force(self):
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -4010,8 +4121,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_user_metadata_remove_force_for_nonexisting_key(self):
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -4051,8 +4162,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_user_metadata_set_and_get_for_legacy_subvolume(self):
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # emulate a old-fashioned subvolume in a custom group
         createpath = os.path.join(".", "volumes", group, subvolname)
@@ -4085,8 +4196,8 @@ class TestSubvolumes(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_user_metadata_list_and_remove_for_legacy_subvolume(self):
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # emulate a old-fashioned subvolume in a custom group
         createpath = os.path.join(".", "volumes", group, subvolname)
@@ -4133,9 +4244,9 @@ class TestSubvolumeGroupSnapshots(TestVolumesHelper):
     """Tests for FS subvolume group snapshot operations."""
     @unittest.skip("skipping subvolumegroup snapshot tests")
     def test_nonexistent_subvolume_group_snapshot_rm(self):
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -4169,9 +4280,9 @@ class TestSubvolumeGroupSnapshots(TestVolumesHelper):
 
     @unittest.skip("skipping subvolumegroup snapshot tests")
     def test_subvolume_group_snapshot_create_and_rm(self):
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -4196,9 +4307,9 @@ class TestSubvolumeGroupSnapshots(TestVolumesHelper):
 
     @unittest.skip("skipping subvolumegroup snapshot tests")
     def test_subvolume_group_snapshot_idempotence(self):
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -4231,11 +4342,11 @@ class TestSubvolumeGroupSnapshots(TestVolumesHelper):
         snapshots = []
 
         # create group
-        group = self._generate_random_group_name()
+        group = self._gen_subvol_grp_name()
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
 
         # create subvolumegroup snapshots
-        snapshots = self._generate_random_snapshot_name(3)
+        snapshots = self._gen_subvol_snap_name(3)
         for snapshot in snapshots:
             self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot)
 
@@ -4250,8 +4361,8 @@ class TestSubvolumeGroupSnapshots(TestVolumesHelper):
     @unittest.skip("skipping subvolumegroup snapshot tests")
     def test_subvolume_group_snapshot_rm_force(self):
         # test removing non-existing subvolume group snapshot with --force
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
         # remove snapshot
         try:
             self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot, "--force")
@@ -4259,8 +4370,8 @@ class TestSubvolumeGroupSnapshots(TestVolumesHelper):
             raise RuntimeError("expected the 'fs subvolumegroup snapshot rm --force' command to succeed")
 
     def test_subvolume_group_snapshot_unsupported_status(self):
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -4280,8 +4391,8 @@ class TestSubvolumeGroupSnapshots(TestVolumesHelper):
 class TestSubvolumeSnapshots(TestVolumesHelper):
     """Tests for FS subvolume snapshot operations."""
     def test_nonexistent_subvolume_snapshot_rm(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
@@ -4308,8 +4419,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_snapshot_create_and_rm(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
@@ -4327,8 +4438,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_snapshot_create_idempotence(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
@@ -4356,8 +4467,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
 
         snap_md = ["created_at", "data_pool", "has_pending_clones"]
 
-        subvolume = self._generate_random_subvolume_name()
-        snapshot, snap_missing = self._generate_random_snapshot_name(2)
+        subvolume = self._gen_subvol_name()
+        snapshot, snap_missing = self._gen_subvol_snap_name(2)
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -4391,9 +4502,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_snapshot_in_group(self):
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -4422,11 +4533,11 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         snapshots = []
 
         # create subvolume
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
 
         # create subvolume snapshots
-        snapshots = self._generate_random_snapshot_name(3)
+        snapshots = self._gen_subvol_snap_name(3)
         for snapshot in snapshots:
             self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
 
@@ -4454,8 +4565,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         # at ancestral level
 
         snapshots = []
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
         snap_count = 3
 
         # create group
@@ -4465,7 +4576,7 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group)
 
         # create subvolume snapshots
-        snapshots = self._generate_random_snapshot_name(snap_count)
+        snapshots = self._gen_subvol_snap_name(snap_count)
         for snapshot in snapshots:
             self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group)
 
@@ -4500,8 +4611,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         at ancestral level
         """
 
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # create group
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -4548,8 +4659,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         at ancestral level
         """
 
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # create group
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -4596,9 +4707,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         fail.
         """
 
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        group_snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        group_snapshot = self._gen_subvol_snap_name()
 
         # create group
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -4637,8 +4748,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         ensure retained subvolume recreate does not leave any incarnations in the subvolume and trash
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
@@ -4683,8 +4794,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         snap_md = ["created_at", "data_pool", "has_pending_clones"]
 
-        subvolume = self._generate_random_subvolume_name()
-        snapshot1, snapshot2 = self._generate_random_snapshot_name(2)
+        subvolume = self._gen_subvol_name()
+        snapshot1, snapshot2 = self._gen_subvol_snap_name(2)
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
@@ -4746,8 +4857,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         snap_md = ["created_at", "data_pool", "has_pending_clones"]
 
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
@@ -4840,7 +4951,7 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         ensure retain snapshots based delete of a subvolume with no snapshots, deletes the subbvolume
         """
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
@@ -4859,8 +4970,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         ensure retained subvolume recreate fails if its trash is not yet purged
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
@@ -4898,8 +5009,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_rm_with_snapshots(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
@@ -4930,9 +5041,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         Snapshot protect/unprotect commands are deprecated. This test exists to ensure that
         invoking the command does not cause errors, till they are removed from a subsequent release.
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -4970,8 +5081,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
 
     def test_subvolume_snapshot_rm_force(self):
         # test removing non existing subvolume snapshot with --force
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # remove snapshot
         try:
@@ -4983,9 +5094,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         Set custom metadata for subvolume snapshot.
         """
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -5015,9 +5126,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         Set custom metadata for subvolume snapshot (Idempotency).
         """
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -5065,9 +5176,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         Get custom metadata for a specified key in subvolume snapshot metadata.
         """
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -5106,9 +5217,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         Get custom metadata for subvolume snapshot if specified key not exist in metadata.
         """
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -5144,9 +5255,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         Get custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot.
         """
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -5177,9 +5288,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         Update custom metadata for a specified key in subvolume snapshot metadata.
         """
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -5222,9 +5333,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         List custom metadata for subvolume snapshot.
         """
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -5261,9 +5372,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         List custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot.
         """
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -5295,9 +5406,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         Remove custom metadata for a specified key in subvolume snapshot metadata.
         """
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -5338,9 +5449,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         Remove custom metadata for subvolume snapshot if specified key not exist in metadata.
         """
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -5376,9 +5487,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         Remove custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot.
         """
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -5409,9 +5520,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         Forcefully remove custom metadata for a specified key in subvolume snapshot metadata.
         """
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -5452,9 +5563,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         Forcefully remove custom metadata for subvolume snapshot if specified key not exist in metadata.
         """
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -5501,9 +5612,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         Verify metadata removal of subvolume snapshot after snapshot removal.
         """
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -5533,9 +5644,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
 
         # try to get metadata after removing snapshot.
         # Expecting error ENOENT with error message of snapshot does not exist
-        cmd_ret = self.mgr_cluster.mon_manager.run_cluster_cmd(
-                args=["fs", "subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group],
-                check_status=False, stdout=StringIO(), stderr=StringIO())
+        cmd_ret = self.run_ceph_cmd(
+            args=["fs", "subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group], check_status=False, stdout=StringIO(),
+            stderr=StringIO())
         self.assertEqual(cmd_ret.returncode, errno.ENOENT, "Expecting ENOENT error")
         self.assertIn(f"snapshot '{snapshot}' does not exist", cmd_ret.stderr.getvalue(),
                 f"Expecting message: snapshot '{snapshot}' does not exist ")
@@ -5561,9 +5672,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper):
         """
         Validate cleaning of stale subvolume snapshot metadata.
         """
-        subvolname = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolname = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create group.
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -5628,9 +5739,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
                      "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace",
                      "type", "uid"]
 
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -5672,8 +5783,8 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         If no clone is performed then path /volumes/_index/clone/{track_id}
         will not exist.
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create subvolume.
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -5700,10 +5811,13 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         """
         Verify subvolume snapshot info output if no clone is in pending state.
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
         clone_list =  [f'clone_{i}' for i in range(3)]
 
+        # disable "capped" clones
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_no_wait', False)
+
         # create subvolume.
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
 
@@ -5741,8 +5855,8 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         Clones are not specified for particular target_group. Hence target_group
         should not be in the output as we don't show _nogroup (default group)
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
         clone_list =  [f'clone_{i}' for i in range(3)]
 
         # create subvolume.
@@ -5754,6 +5868,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         # insert delay at the beginning of snapshot clone
         self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5)
 
+        # disable "capped" clones
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_no_wait', False)
+
         # schedule a clones
         for clone in clone_list:
             self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
@@ -5788,11 +5905,11 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         Verify subvolume snapshot info output if clones are in pending state.
         Clones are not specified for target_group.
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
-        group = self._generate_random_group_name()
-        target_group = self._generate_random_group_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
+        group = self._gen_subvol_grp_name()
+        target_group = self._gen_subvol_grp_name()
 
         # create groups
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -5844,8 +5961,8 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         Orphan clones should not list under pending clones.
         orphan_clones_count should display correct count of orphan clones'
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
         clone_list =  [f'clone_{i}' for i in range(3)]
 
         # create subvolume.
@@ -5857,6 +5974,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         # insert delay at the beginning of snapshot clone
         self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 15)
 
+        # disable "capped" clones
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_no_wait', False)
+
         # schedule a clones
         for clone in clone_list:
             self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
@@ -5891,7 +6011,7 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self.assertEqual(res['has_pending_clones'], "no")
 
     def test_non_clone_status(self):
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
@@ -5911,9 +6031,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_clone_inherit_snapshot_namespace_and_size(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
         osize = self.DEFAULT_FILE_SIZE*1024*1024*12
 
         # create subvolume, in an isolated namespace with a specified size
@@ -5955,9 +6075,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_clone_inherit_quota_attrs(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
         osize = self.DEFAULT_FILE_SIZE*1024*1024*12
 
         # create subvolume with a specified size
@@ -6003,9 +6123,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_clone_in_progress_getpath(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6052,9 +6172,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_clone_in_progress_snapshot_rm(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6100,9 +6220,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_clone_in_progress_source(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6151,9 +6271,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         """
         retain snapshots of a cloned subvolume and check disallowed operations
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot1, snapshot2 = self._generate_random_snapshot_name(2)
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot1, snapshot2 = self._gen_subvol_snap_name(2)
+        clone = self._gen_subvol_clone_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6225,9 +6345,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         """
         clone a snapshot from a snapshot retained subvolume
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6270,9 +6390,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         """
         clone a subvolume from recreated subvolume's latest snapshot
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot1, snapshot2 = self._generate_random_snapshot_name(2)
-        clone = self._generate_random_clone_name(1)
+        subvolume = self._gen_subvol_name()
+        snapshot1, snapshot2 = self._gen_subvol_snap_name(2)
+        clone = self._gen_subvol_clone_name(1)
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6328,8 +6448,8 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         """
         recreate a subvolume from one of its retained snapshots
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6372,9 +6492,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         """
         ensure retained clone recreate fails if its trash is not yet purged
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
@@ -6426,9 +6546,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_snapshot_attr_clone(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6462,9 +6582,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         """
         ensure failure status is not shown when clone is not in failed/cancelled state
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone1 = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone1 = self._gen_subvol_clone_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6528,9 +6648,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         """
         ensure failure status is shown when clone is in failed state and validate the reason
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone1 = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone1 = self._gen_subvol_clone_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6573,9 +6693,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         """
         ensure failure status is shown when clone is cancelled during pending state and validate the reason
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone1 = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone1 = self._gen_subvol_clone_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6617,9 +6737,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         """
         ensure failure status is shown when clone is cancelled during in-progress state and validate the reason
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone1 = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone1 = self._gen_subvol_clone_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6661,9 +6781,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_snapshot_clone(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6694,9 +6814,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_snapshot_clone_quota_exceeded(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         # create subvolume with 20MB quota
         osize = self.DEFAULT_FILE_SIZE*1024*1024*20
@@ -6738,9 +6858,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         'complete|cancelled|failed' states. It fails with EAGAIN in any other states.
         """
 
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6785,9 +6905,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_snapshot_clone_retain_suid_guid(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6827,9 +6947,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_snapshot_clone_and_reclone(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone1, clone2 = self._generate_random_clone_name(2)
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone1, clone2 = self._gen_subvol_clone_name(2)
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6880,9 +7000,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_snapshot_clone_cancel_in_progress(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6931,9 +7051,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         # yeh, 1gig -- we need the clone to run for sometime
         FILE_SIZE_MB = 1024
 
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clones = self._generate_random_clone_name(NR_CLONES)
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clones = self._gen_subvol_snap_name(NR_CLONES)
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -6944,6 +7064,11 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         # snapshot subvolume
         self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
 
+        # Disable the snapshot_clone_no_wait config option
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_no_wait', False)
+        threads_available = self.config_get('mgr', 'mgr/volumes/snapshot_clone_no_wait')
+        self.assertEqual(threads_available, 'false')
+
         # schedule clones
         for clone in clones:
             self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
@@ -6983,10 +7108,10 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_snapshot_clone_different_groups(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
-        s_group, c_group = self._generate_random_group_name(2)
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
+        s_group, c_group = self._gen_subvol_grp_name(2)
 
         # create groups
         self._fs_cmd("subvolumegroup", "create", self.volname, s_group)
@@ -7026,9 +7151,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_snapshot_clone_fail_with_remove(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone1, clone2 = self._generate_random_clone_name(2)
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone1, clone2 = self._gen_subvol_clone_name(2)
 
         pool_capacity = 32 * 1024 * 1024
         # number of files required to fill up 99% of the pool
@@ -7047,8 +7172,8 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         new_pool = "new_pool"
         self.fs.add_data_pool(new_pool)
 
-        self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", new_pool,
-                                            "max_bytes", "{0}".format(pool_capacity // 4))
+        self.run_ceph_cmd("osd", "pool", "set-quota", new_pool,
+                          "max_bytes", f"{pool_capacity // 4}")
 
         # schedule a clone
         self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1, "--pool_layout", new_pool)
@@ -7089,9 +7214,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_snapshot_clone_on_existing_subvolumes(self):
-        subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume1, subvolume2 = self._gen_subvol_name(2)
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         # create subvolumes
         self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--mode=777")
@@ -7141,9 +7266,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_snapshot_clone_pool_layout(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         # add data pool
         new_pool = "new_pool"
@@ -7185,10 +7310,10 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_snapshot_clone_under_group(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
-        group = self._generate_random_group_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
+        group = self._gen_subvol_grp_name()
 
         # create subvolume
         self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
@@ -7225,9 +7350,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self._wait_for_trash_empty()
 
     def test_subvolume_snapshot_clone_with_attrs(self):
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         mode = "777"
         uid  = "1000"
@@ -7274,9 +7399,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         and verify clone operation.
         further ensure that a legacy volume is not updated to v2, but clone is.
         """
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         # emulate a old-fashioned subvolume
         createpath = os.path.join(".", "volumes", "_nogroup", subvolume)
@@ -7367,10 +7492,10 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         self.assertEqual(max_concurrent_clones, 2)
 
     def test_subvolume_under_group_snapshot_clone(self):
-        subvolume = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone = self._generate_random_clone_name()
+        subvolume = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone = self._gen_subvol_clone_name()
 
         # create group
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
@@ -7406,6 +7531,159 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper):
         # verify trash dir is clean
         self._wait_for_trash_empty()
 
+    def test_subvolume_snapshot_clone_with_no_wait_enabled(self):
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone1, clone2, clone3 = self._gen_subvol_clone_name(3)
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=10)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # Decrease number of cloner threads
+        self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 2)
+        max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones'))
+        self.assertEqual(max_concurrent_clones, 2)
+
+        # Enable the snapshot_clone_no_wait config option
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_no_wait', True)
+        threads_available = self.config_get('mgr', 'mgr/volumes/snapshot_clone_no_wait')
+        self.assertEqual(threads_available, 'true')
+
+        # Insert delay of 15 seconds at the beginning of the snapshot clone
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 15)
+
+        # schedule a clone1
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1)
+
+        # schedule a clone2
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone2)
+
+        # schedule a clone3
+        cmd_ret = self.mgr_cluster.mon_manager.run_cluster_cmd(
+            args=["fs", "subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone3], check_status=False, stdout=StringIO(),
+            stderr=StringIO())
+        self.assertEqual(cmd_ret.returncode, errno.EAGAIN, "Expecting EAGAIN error")
+
+        # check clone1 status
+        self._wait_for_clone_to_complete(clone1)
+
+        # verify clone1
+        self._verify_clone(subvolume, snapshot, clone1)
+
+        # check clone2 status
+        self._wait_for_clone_to_complete(clone2)
+
+        # verify clone2
+        self._verify_clone(subvolume, snapshot, clone2)
+
+        # schedule clone3 , it should be successful this time
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone3)
+
+        # check clone3 status
+        self._wait_for_clone_to_complete(clone3)
+
+        # verify clone3
+        self._verify_clone(subvolume, snapshot, clone3)
+
+        # set number of cloner threads to default
+        self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 4)
+        max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones'))
+        self.assertEqual(max_concurrent_clones, 4)
+
+        # set the snapshot_clone_delay to default
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 0)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone1)
+        self._fs_cmd("subvolume", "rm", self.volname, clone2)
+        self._fs_cmd("subvolume", "rm", self.volname, clone3)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_clone_with_no_wait_not_enabled(self):
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone1, clone2, clone3 = self._gen_subvol_clone_name(3)
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777")
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=10)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # Disable the snapshot_clone_no_wait config option
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_no_wait', False)
+        threads_available = self.config_get('mgr', 'mgr/volumes/snapshot_clone_no_wait')
+        self.assertEqual(threads_available, 'false')
+
+        # Decrease number of cloner threads
+        self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 2)
+        max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones'))
+        self.assertEqual(max_concurrent_clones, 2)
+
+        # schedule a clone1
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1)
+
+        # schedule a clone2
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone2)
+
+        # schedule a clone3
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone3)
+
+        # check clone1 status
+        self._wait_for_clone_to_complete(clone1)
+
+        # verify clone1
+        self._verify_clone(subvolume, snapshot, clone1)
+
+        # check clone2 status
+        self._wait_for_clone_to_complete(clone2)
+
+        # verify clone2
+        self._verify_clone(subvolume, snapshot, clone2)
+
+        # check clone3 status
+        self._wait_for_clone_to_complete(clone3)
+
+        # verify clone3
+        self._verify_clone(subvolume, snapshot, clone3)
+
+        # set the snapshot_clone_no_wait config option to default
+        self.config_set('mgr', 'mgr/volumes/snapshot_clone_no_wait', True)
+        threads_available = self.config_get('mgr', 'mgr/volumes/snapshot_clone_no_wait')
+        self.assertEqual(threads_available, 'true')
+
+        # set number of cloner threads to default
+        self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 4)
+        max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones'))
+        self.assertEqual(max_concurrent_clones, 4)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone1)
+        self._fs_cmd("subvolume", "rm", self.volname, clone2)
+        self._fs_cmd("subvolume", "rm", self.volname, clone3)
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
 
 class TestMisc(TestVolumesHelper):
     """Miscellaneous tests related to FS volume, subvolume group, and subvolume operations."""
@@ -7417,7 +7695,7 @@ class TestMisc(TestVolumesHelper):
         self.assertLessEqual(len(sessions), 1) # maybe mgr is already mounted
 
         # Get the mgr to definitely mount cephfs
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
         sessions = self._session_list()
         self.assertEqual(len(sessions), 1)
@@ -7433,7 +7711,7 @@ class TestMisc(TestVolumesHelper):
         self.assertLessEqual(len(sessions), 1) # maybe mgr is already mounted
 
         # Get the mgr to definitely mount cephfs
-        subvolume = self._generate_random_subvolume_name()
+        subvolume = self._gen_subvol_name()
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
         sessions = self._session_list()
         self.assertEqual(len(sessions), 1)
@@ -7537,8 +7815,8 @@ class TestMisc(TestVolumesHelper):
         accessible.
         further ensure that a legacy volume is not updated to v2.
         """
-        subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
-        group = self._generate_random_group_name()
+        subvolume1, subvolume2 = self._gen_subvol_name(2)
+        group = self._gen_subvol_grp_name()
 
         # emulate a old-fashioned subvolume -- one in the default group and
         # the other in a custom group
@@ -7588,9 +7866,9 @@ class TestMisc(TestVolumesHelper):
                      "type", "uid", "features", "state"]
         snap_md = ["created_at", "data_pool", "has_pending_clones"]
 
-        subvolume = self._generate_random_subvolume_name()
-        snapshot = self._generate_random_snapshot_name()
-        clone1, clone2 = self._generate_random_clone_name(2)
+        subvolume = self._gen_subvol_name()
+        snapshot = self._gen_subvol_snap_name()
+        clone1, clone2 = self._gen_subvol_clone_name(2)
         mode = "777"
         uid  = "1000"
         gid  = "1000"
@@ -7695,8 +7973,8 @@ class TestMisc(TestVolumesHelper):
         poor man's upgrade test -- theme continues...
         ensure v1 to v2 upgrades are not done automatically due to various states of v1
         """
-        subvolume1, subvolume2, subvolume3 = self._generate_random_subvolume_name(3)
-        group = self._generate_random_group_name()
+        subvolume1, subvolume2, subvolume3 = self._gen_subvol_name(3)
+        group = self._gen_subvol_grp_name()
 
         # emulate a v1 subvolume -- in the default group
         subvol1_path = self._create_v1_subvolume(subvolume1)
@@ -7753,8 +8031,8 @@ class TestMisc(TestVolumesHelper):
         poor man's upgrade test -- theme continues...
         ensure v1 to v2 upgrades work
         """
-        subvolume1, subvolume2 = self._generate_random_subvolume_name(2)
-        group = self._generate_random_group_name()
+        subvolume1, subvolume2 = self._gen_subvol_name(2)
+        group = self._gen_subvol_grp_name()
 
         # emulate a v1 subvolume -- in the default group
         subvol1_path = self._create_v1_subvolume(subvolume1, has_snapshot=False)
@@ -7786,7 +8064,7 @@ class TestMisc(TestVolumesHelper):
         on legacy subvol upgrade to v1
         poor man's upgrade test -- theme continues...
         """
-        subvol1, subvol2 = self._generate_random_subvolume_name(2)
+        subvol1, subvol2 = self._gen_subvol_name(2)
 
         # emulate a old-fashioned subvolume in the default group
         createpath1 = os.path.join(".", "volumes", "_nogroup", subvol1)
@@ -7822,7 +8100,7 @@ class TestMisc(TestVolumesHelper):
         self._fs_cmd("subvolume", "authorize", self.volname, subvol1, authid1)
 
         # Validate that the mds path added is of subvol1 and not of subvol2
-        out = json.loads(self.fs.mon_manager.raw_cluster_cmd("auth", "get", "client.alice", "--format=json-pretty"))
+        out = json.loads(self.get_ceph_cmd_stdout("auth", "get", "client.alice", "--format=json-pretty"))
         self.assertEqual("client.alice", out[0]["entity"])
         self.assertEqual("allow rw path={0}".format(createpath1[1:]), out[0]["caps"]["mds"])
 
@@ -7839,8 +8117,8 @@ class TestMisc(TestVolumesHelper):
         on legacy subvol upgrade to v1
         poor man's upgrade test -- theme continues...
         """
-        subvol = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvol = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # emulate a old-fashioned subvolume -- in a custom group
         createpath = os.path.join(".", "volumes", group, subvol)
@@ -7882,8 +8160,8 @@ class TestMisc(TestVolumesHelper):
         on legacy subvol upgrade to v1
         poor man's upgrade test -- theme continues...
         """
-        subvol = self._generate_random_subvolume_name()
-        group = self._generate_random_group_name()
+        subvol = self._gen_subvol_name()
+        group = self._gen_subvol_grp_name()
 
         # emulate a old-fashioned subvolume -- in a custom group
         createpath = os.path.join(".", "volumes", group, subvol)
@@ -7926,8 +8204,8 @@ class TestPerModuleFinsherThread(TestVolumesHelper):
     as four subvolume cmds are run
     """
     def test_volumes_module_finisher_thread(self):
-        subvol1, subvol2, subvol3 = self._generate_random_subvolume_name(3)
-        group = self._generate_random_group_name()
+        subvol1, subvol2, subvol3 = self._gen_subvol_name(3)
+        group = self._gen_subvol_grp_name()
 
         # create group
         self._fs_cmd("subvolumegroup", "create", self.volname, group)
diff --git a/qa/tasks/cephfs/xfstests_dev.py b/qa/tasks/cephfs/xfstests_dev.py
index cbb344305..7d5233f8f 100644
--- a/qa/tasks/cephfs/xfstests_dev.py
+++ b/qa/tasks/cephfs/xfstests_dev.py
@@ -143,8 +143,8 @@ class XFSTestsDev(CephFSTestCase):
         import configparser
 
         cp = configparser.ConfigParser()
-        cp.read_string(self.fs.mon_manager.raw_cluster_cmd(
-            'auth', 'get-or-create', 'client.admin'))
+        cp.read_string(self.get_ceph_cmd_stdout('auth', 'get-or-create',
+                                                'client.admin'))
 
         return cp['client.admin']['key']
 
diff --git a/qa/tasks/mgr/dashboard/test_health.py b/qa/tasks/mgr/dashboard/test_health.py
index b6ffade4c..0b7b7a3b4 100644
--- a/qa/tasks/mgr/dashboard/test_health.py
+++ b/qa/tasks/mgr/dashboard/test_health.py
@@ -29,6 +29,7 @@ class HealthTest(DashboardTestCase):
         'in': JList(int),
         'last_failure': int,
         'max_file_size': int,
+        'max_xattr_size': int,
         'explicitly_allowed_features': int,
         'damaged': JList(int),
         'tableserver': int,
@@ -57,7 +58,9 @@ class HealthTest(DashboardTestCase):
             'allow_snaps': bool,
             'allow_multimds_snaps': bool,
             'allow_standby_replay': bool,
-            'refuse_client_session': bool
+            'refuse_client_session': bool,
+            'refuse_standby_for_another_fs': bool,
+            'balance_automate': bool,
         }),
         'ever_allowed_features': int,
         'root': int
diff --git a/qa/tasks/mgr/mgr_test_case.py b/qa/tasks/mgr/mgr_test_case.py
index 94a230c8d..aa5bc6e56 100644
--- a/qa/tasks/mgr/mgr_test_case.py
+++ b/qa/tasks/mgr/mgr_test_case.py
@@ -29,8 +29,11 @@ class MgrCluster(CephCluster):
     def mgr_stop(self, mgr_id):
         self.mgr_daemons[mgr_id].stop()
 
-    def mgr_fail(self, mgr_id):
-        self.mon_manager.raw_cluster_cmd("mgr", "fail", mgr_id)
+    def mgr_fail(self, mgr_id=None):
+        if mgr_id is None:
+            self.mon_manager.raw_cluster_cmd("mgr", "fail")
+        else:
+            self.mon_manager.raw_cluster_cmd("mgr", "fail", mgr_id)
 
     def mgr_restart(self, mgr_id):
         self.mgr_daemons[mgr_id].restart()
@@ -77,6 +80,8 @@ class MgrTestCase(CephTestCase):
         for daemon in cls.mgr_cluster.mgr_daemons.values():
             daemon.stop()
 
+        cls.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "set", "down", "false")
+
         for mgr_id in cls.mgr_cluster.mgr_ids:
             cls.mgr_cluster.mgr_fail(mgr_id)
 
@@ -112,7 +117,11 @@ class MgrTestCase(CephTestCase):
             raise SkipTest(
                 "Only have {0} manager daemons, {1} are required".format(
                     len(cls.mgr_cluster.mgr_ids), cls.MGRS_REQUIRED))
-
+        
+        # We expect laggy OSDs in this testing environment so turn off this warning.
+        # See https://tracker.ceph.com/issues/61907
+        cls.mgr_cluster.mon_manager.raw_cluster_cmd('config', 'set', 'mds',
+                                                    'defer_client_eviction_on_laggy_osds', 'false')
         cls.setup_mgrs()
 
     @classmethod
diff --git a/qa/tasks/mgr/test_cli.py b/qa/tasks/mgr/test_cli.py
new file mode 100644
index 000000000..a43be90ea
--- /dev/null
+++ b/qa/tasks/mgr/test_cli.py
@@ -0,0 +1,32 @@
+import logging
+
+from .mgr_test_case import MgrTestCase
+
+log = logging.getLogger(__name__)
+
+
+class TestCLI(MgrTestCase):
+    MGRS_REQUIRED = 2
+
+    def setUp(self):
+        super(TestCLI, self).setUp()
+        self.setup_mgrs()
+
+    def test_set_down(self):
+        """
+        That setting the down flag prevents a standby from promoting.
+        """
+
+        with self.assert_cluster_log("Activating manager daemon", present=False):
+            self.mgr_cluster.mon_manager.raw_cluster_cmd('mgr', 'set', 'down', 'true')
+            self.wait_until_true(lambda: self.mgr_cluster.get_active_id() == "", timeout=60)
+
+    def test_set_down_off(self):
+        """
+        That removing the down flag allows a standby to promote.
+        """
+
+        with self.assert_cluster_log("Activating manager daemon"):
+            self.mgr_cluster.mon_manager.raw_cluster_cmd('mgr', 'set', 'down', 'true')
+            self.wait_until_true(lambda: self.mgr_cluster.get_active_id() == "", timeout=60)
+            self.mgr_cluster.mon_manager.raw_cluster_cmd('mgr', 'set', 'down', 'false')
diff --git a/qa/tasks/mgr/test_devicehealth.py b/qa/tasks/mgr/test_devicehealth.py
new file mode 100644
index 000000000..d3aa33fc0
--- /dev/null
+++ b/qa/tasks/mgr/test_devicehealth.py
@@ -0,0 +1,33 @@
+from io import StringIO
+import logging
+
+from .mgr_test_case import MgrTestCase
+
+log = logging.getLogger(__name__)
+
+
+class TestDeviceHealth(MgrTestCase):
+    MGRS_REQUIRED = 1
+
+    def setUp(self):
+        super(TestDeviceHealth, self).setUp()
+        self.setup_mgrs()
+
+    def tearDown(self):
+        self.mgr_cluster.mon_manager.raw_cluster_cmd('mgr', 'set', 'down', 'true')
+        self.mgr_cluster.mon_manager.raw_cluster_cmd('config', 'set', 'mon', 'mon_allow_pool_delete', 'true')
+        self.mgr_cluster.mon_manager.raw_cluster_cmd('osd', 'pool', 'rm', '.mgr', '.mgr', '--yes-i-really-really-mean-it-not-faking')
+        self.mgr_cluster.mon_manager.raw_cluster_cmd('mgr', 'set', 'down', 'false')
+
+    def test_legacy_upgrade_snap(self):
+        """
+        """
+
+        o = "ABC_DEADB33F_FA"
+        self.mon_manager.do_rados(["put", o, "-"], pool=".mgr", stdin=StringIO("junk"))
+        self.mon_manager.do_rados(["mksnap", "foo"], pool=".mgr")
+        self.mon_manager.do_rados(["rm", o], pool=".mgr")
+        self.mgr_cluster.mgr_fail()
+
+        with self.assert_cluster_log("Unhandled exception from module 'devicehealth' while running", present=False):
+            self.wait_until_true(lambda: self.mgr_cluster.get_active_id() is not None, timeout=60)
diff --git a/qa/tasks/radosgw_admin.py b/qa/tasks/radosgw_admin.py
index 780dae1e1..28d58715f 100644
--- a/qa/tasks/radosgw_admin.py
+++ b/qa/tasks/radosgw_admin.py
@@ -7,8 +7,9 @@ Rgw admin testing against a running instance
 #   grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //'
 #
 # to run this standalone:
-#	python qa/tasks/radosgw_admin.py [--user=uid] --host=host --port=port
-#
+#   1. uncomment vstart_runner lines to run locally against a vstart cluster
+#   2. run:
+#        $ python qa/tasks/radosgw_admin.py [--user=uid] --host=host --port=port
 
 import json
 import logging
@@ -27,7 +28,7 @@ import httplib2
 
 #import pdb
 
-import tasks.vstart_runner
+#import tasks.vstart_runner
 from tasks.rgw import RGWEndpoint
 from tasks.util.rgw import rgwadmin as tasks_util_rgw_rgwadmin
 from tasks.util.rgw import get_user_summary, get_user_successful_ops
@@ -1107,7 +1108,7 @@ def task(ctx, config):
     (err, out) = rgwadmin(ctx, client, ['zonegroup', 'get'], check_status=True)
 
 from teuthology.config import config
-from teuthology.orchestra import cluster
+from teuthology.orchestra import cluster, remote
 
 import argparse;
 
@@ -1124,7 +1125,9 @@ def main():
     else:
         port = 80
 
-    client0 = tasks.vstart_runner.LocalRemote()
+    client0 = remote.Remote(host)
+    #client0 = tasks.vstart_runner.LocalRemote()
+
     ctx = config
     ctx.cluster=cluster.Cluster(remotes=[(client0,
         [ 'ceph.client.rgw.%s' % (port),  ]),])
diff --git a/qa/tasks/vstart_runner.py b/qa/tasks/vstart_runner.py
index df4886fb6..3d429c265 100644
--- a/qa/tasks/vstart_runner.py
+++ b/qa/tasks/vstart_runner.py
@@ -777,9 +777,11 @@ class LocalCephManager(CephManager):
         self.rook = False
         self.testdir = None
         self.run_ceph_w_prefix = self.run_cluster_cmd_prefix = [CEPH_CMD]
-        self.CEPH_CMD = [CEPH_CMD]
         self.RADOS_CMD = [RADOS_CMD]
 
+    def get_ceph_cmd(self, **kwargs):
+        return [CEPH_CMD]
+
     def find_remote(self, daemon_type, daemon_id):
         """
         daemon_type like 'mds', 'osd'
diff --git a/qa/workunits/cephadm/test_cephadm_timeout.py b/qa/workunits/cephadm/test_cephadm_timeout.py
new file mode 100755
index 000000000..67b43a2df
--- /dev/null
+++ b/qa/workunits/cephadm/test_cephadm_timeout.py
@@ -0,0 +1,179 @@
+#!/usr/bin/python3 -s
+
+import time
+import os
+import fcntl
+import subprocess
+import uuid
+import sys
+
+from typing import Optional, Any
+
+LOCK_DIR = '/run/cephadm'
+DATA_DIR = '/var/lib/ceph'
+
+class _Acquire_ReturnProxy(object):
+    def __init__(self, lock: 'FileLock') -> None:
+        self.lock = lock
+        return None
+
+    def __enter__(self) -> 'FileLock':
+        return self.lock
+
+    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+        self.lock.release()
+        return None
+
+class FileLock(object):
+    def __init__(self, name: str, timeout: int = -1) -> None:
+        if not os.path.exists(LOCK_DIR):
+            os.mkdir(LOCK_DIR, 0o700)
+        self._lock_file = os.path.join(LOCK_DIR, name + '.lock')
+
+        self._lock_file_fd: Optional[int] = None
+        self.timeout = timeout
+        self._lock_counter = 0
+        return None
+
+    @property
+    def is_locked(self) -> bool:
+        return self._lock_file_fd is not None
+
+    def acquire(self, timeout: Optional[int] = None, poll_intervall: float = 0.05) -> _Acquire_ReturnProxy:
+        # Use the default timeout, if no timeout is provided.
+        if timeout is None:
+            timeout = self.timeout
+
+        # Increment the number right at the beginning.
+        # We can still undo it, if something fails.
+        self._lock_counter += 1
+
+        start_time = time.time()
+        try:
+            while True:
+                if not self.is_locked:
+                    self._acquire()
+
+                if self.is_locked:
+                    break
+                elif timeout >= 0 and time.time() - start_time > timeout:
+                    raise Exception(self._lock_file)
+                else:
+                    time.sleep(poll_intervall)
+        except Exception:
+            # Something did go wrong, so decrement the counter.
+            self._lock_counter = max(0, self._lock_counter - 1)
+
+            raise
+        return _Acquire_ReturnProxy(lock=self)
+
+    def release(self, force: bool = False) -> None:
+        if self.is_locked:
+            self._lock_counter -= 1
+
+            if self._lock_counter == 0 or force:
+                self._release()
+                self._lock_counter = 0
+
+        return None
+
+    def __enter__(self) -> 'FileLock':
+        self.acquire()
+        return self
+
+    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+        self.release()
+        return None
+
+    def __del__(self) -> None:
+        self.release(force=True)
+        return None
+
+    def _acquire(self) -> None:
+        open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC
+        fd = os.open(self._lock_file, open_mode)
+
+        try:
+            fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+        except (IOError, OSError):
+            os.close(fd)
+        else:
+            self._lock_file_fd = fd
+        return None
+
+    def _release(self) -> None:
+        fd = self._lock_file_fd
+        self._lock_file_fd = None
+        fcntl.flock(fd, fcntl.LOCK_UN)  # type: ignore
+        os.close(fd)  # type: ignore
+        return None
+
+def _is_fsid(s):
+    try:
+        uuid.UUID(s)
+    except ValueError:
+        return False
+    return True
+
+def find_fsid():
+    if not os.path.exists(DATA_DIR):
+        raise Exception(f'{DATA_DIR} does not exist. Aborting...')
+
+    for d in os.listdir(DATA_DIR):
+        # assume the first thing we find that is an fsid
+        # is what we want. Not expecting multiple clusters
+        # to have been installed here.
+        if _is_fsid(d):
+            return d
+    raise Exception(f'No fsid dir found in {DATA_DIR} does not exist. Aborting...')
+
+def main():
+    print('Looking for cluster fsid...')
+    fsid = find_fsid()
+    print(f'Found fsid {fsid}')
+
+    print('Setting cephadm command timeout to 120...')
+    subprocess.run(['cephadm', 'shell', '--', 'ceph', 'config', 'set',
+                    'mgr', 'mgr/cephadm/default_cephadm_command_timeout', '120'],
+                    check=True)
+
+    print('Taking hold of cephadm lock for 300 seconds...')
+    lock = FileLock(fsid, 300)
+    lock.acquire()
+
+    print('Triggering cephadm device refresh...')
+    subprocess.run(['cephadm', 'shell', '--', 'ceph', 'orch', 'device', 'ls', '--refresh'],
+                    check=True)
+
+    print('Sleeping 150 seconds to allow for timeout to occur...')
+    time.sleep(150)
+
+    print('Checking ceph health detail...')
+    # directing stdout to res.stdout via "capture_stdout" option
+    # (and same for stderr) seems to have been added in python 3.7.
+    # Using files so this works with 3.6 as well
+    with open('/tmp/ceph-health-detail-stdout', 'w') as f_stdout:
+        with open('/tmp/ceph-health-detail-stderr', 'w') as f_stderr:
+            subprocess.run(['cephadm', 'shell', '--', 'ceph', 'health', 'detail'],
+                           check=True, stdout=f_stdout, stderr=f_stderr)
+
+    res_stdout = open('/tmp/ceph-health-detail-stdout', 'r').read()
+    res_stderr = open('/tmp/ceph-health-detail-stderr', 'r').read()
+    print(f'"cephadm shell -- ceph health detail" stdout:\n{res_stdout}')
+    print(f'"cephadm shell -- ceph health detail" stderr:\n{res_stderr}')
+
+    print('Checking for correct health warning in health detail...')
+    if 'CEPHADM_REFRESH_FAILED' not in res_stdout:
+        raise Exception('No health warning caused by timeout was raised')
+    if 'Command "cephadm ceph-volume -- inventory" timed out' not in res_stdout:
+        raise Exception('Health warnings did not contain message about time out')
+
+    print('Health warnings found succesfully. Exiting.')
+    return 0
+
+    
+if __name__ == '__main__':
+    if os.getuid() != 0:
+        print('Trying to run myself with sudo...')
+        os.execvp('sudo', [sys.executable] + list(sys.argv))
+    main()
diff --git a/qa/workunits/fs/full/subvolume_clone.sh b/qa/workunits/fs/full/subvolume_clone.sh
index a11131215..d61e07111 100755
--- a/qa/workunits/fs/full/subvolume_clone.sh
+++ b/qa/workunits/fs/full/subvolume_clone.sh
@@ -7,8 +7,8 @@ set -ex
 # Hence the subsequent subvolume commands on the clone fails with
 # 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)' traceback.
 
-# The osd is of the size 1GB. The full-ratios are set so that osd is treated full
-# at around 600MB. The subvolume is created and 100MB is written.
+# The osd is of the size 2GiB. The full-ratios are set so that osd is treated full
+# at around 1.2GB. The subvolume is created and 200MB is written.
 # The subvolume is snapshotted and cloned ten times. Since the clone delay is set to 15 seconds,
 # all the clones reach pending state for sure. Among ten clones, only few succeed and rest fails
 # with ENOSPACE.
@@ -47,7 +47,7 @@ echo "After ratios are set"
 df -h
 ceph osd df
 
-for i in {1..100};do sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path_0/1MB_file-$i status=progress bs=1M count=1 conv=fdatasync;done
+for i in {1..100};do sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path_0/2MB_file-$i status=progress bs=1M count=2 conv=fdatasync;done
 
 # For debugging
 echo "After subvolumes are written"
@@ -60,6 +60,9 @@ ceph fs subvolume snapshot create cephfs sub_0 snap_0
 # Set clone snapshot delay
 ceph config set mgr mgr/volumes/snapshot_clone_delay 15
 
+# Disable the snapshot_clone_no_wait config option
+ceph config set mgr mgr/volumes/snapshot_clone_no_wait false
+
 # Schedule few clones, some would fail with no space
 for i in $(eval echo {1..$NUM_CLONES});do ceph fs subvolume snapshot clone cephfs sub_0 snap_0 clone_$i;done
 
diff --git a/qa/workunits/fs/full/subvolume_rm.sh b/qa/workunits/fs/full/subvolume_rm.sh
index a464e30f5..2a3bf956d 100755
--- a/qa/workunits/fs/full/subvolume_rm.sh
+++ b/qa/workunits/fs/full/subvolume_rm.sh
@@ -2,8 +2,8 @@
 set -ex
 
 # This testcase tests the scenario of the 'ceph fs subvolume rm' mgr command
-# when the osd is full. The command used to hang. The osd is of the size 1GB.
-# The subvolume is created and 500MB file is written. The full-ratios are
+# when the osd is full. The command used to hang. The osd is of the size 2GiB.
+# The subvolume is created and 1GB file is written. The full-ratios are
 # set below 500MB such that the osd is treated as full. Now the subvolume is
 # is removed. This should be successful with the introduction of FULL
 # capabilities which the mgr holds.
@@ -21,7 +21,7 @@ echo "Before write"
 df -h
 ceph osd df
 
-sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/500MB_file-1 status=progress bs=1M count=500
+sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/1GB_file-1 status=progress bs=1M count=1000
 
 ceph osd set-full-ratio 0.2
 ceph osd set-nearfull-ratio 0.16
diff --git a/qa/workunits/fs/full/subvolume_snapshot_rm.sh b/qa/workunits/fs/full/subvolume_snapshot_rm.sh
index f6d0add9f..8df89d3c7 100755
--- a/qa/workunits/fs/full/subvolume_snapshot_rm.sh
+++ b/qa/workunits/fs/full/subvolume_snapshot_rm.sh
@@ -7,8 +7,8 @@ set -ex
 # snapshot rm of the same snapshot fails with 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)'
 # traceback.
 
-# The osd is of the size 1GB. The subvolume is created and 800MB file is written.
-# Then full-ratios are set below 500MB such that the osd is treated as full.
+# The osd is of the size 2GiB. The subvolume is created and 1.6GB file is written.
+# Then full-ratios are set below 1GiB such that the osd is treated as full.
 # The subvolume snapshot is taken which succeeds as no extra space is required
 # for snapshot. Now, the removal of the snapshot fails with ENOSPACE as it
 # fails to remove the snapshot metadata set. The snapshot removal fails
@@ -31,8 +31,8 @@ echo "Before write"
 df $CEPH_MNT
 ceph osd df
 
-# Write 800MB file and set full ratio to around 200MB
-ignore_failure sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/800MB_file-1 status=progress bs=1M count=800 conv=fdatasync
+# Write 1.6GB file and set full ratio to around 400MB
+ignore_failure sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/1.6GB_file-1 status=progress bs=1M count=1600 conv=fdatasync
 
 ceph osd set-full-ratio 0.2
 ceph osd set-nearfull-ratio 0.16
diff --git a/qa/workunits/fs/quota/quota.sh b/qa/workunits/fs/quota/quota.sh
index 1315be6d8..a2f5c459d 100755
--- a/qa/workunits/fs/quota/quota.sh
+++ b/qa/workunits/fs/quota/quota.sh
@@ -29,7 +29,7 @@ mkdir quota-test
 cd quota-test
 
 # bytes
-setfattr . -n ceph.quota.max_bytes -v 100000000  # 100m
+setfattr . -n ceph.quota.max_bytes -v 100M
 expect_false write_file big 1000     # 1g
 expect_false write_file second 10
 setfattr . -n ceph.quota.max_bytes -v 0
@@ -57,7 +57,7 @@ rm -rf *
 # mix
 mkdir bytes bytes/files
 
-setfattr bytes -n ceph.quota.max_bytes -v 10000000   #10m
+setfattr bytes -n ceph.quota.max_bytes -v 10M
 setfattr bytes/files -n ceph.quota.max_files -v 5
 dd if=/dev/zero of=bytes/files/1 bs=1M count=4
 dd if=/dev/zero of=bytes/files/2 bs=1M count=4
@@ -78,7 +78,7 @@ rm -rf *
 #mv
 mkdir files limit
 truncate files/file -s 10G
-setfattr limit -n ceph.quota.max_bytes -v 1000000 #1m
+setfattr limit -n ceph.quota.max_bytes -v 1M
 expect_false mv files limit/
 
 
@@ -88,8 +88,8 @@ rm -rf *
 #limit by ancestor
 
 mkdir -p ancestor/p1/p2/parent/p3
-setfattr ancestor -n ceph.quota.max_bytes -v 1000000
-setfattr ancestor/p1/p2/parent -n ceph.quota.max_bytes -v 1000000000 #1g
+setfattr ancestor -n ceph.quota.max_bytes -v 1M
+setfattr ancestor/p1/p2/parent -n ceph.quota.max_bytes -v 1G
 expect_false write_file ancestor/p1/p2/parent/p3/file1 900 #900m
 stat --printf="%n %s\n" ancestor/p1/p2/parent/p3/file1
 
@@ -104,6 +104,14 @@ expect_false setfattr -n ceph.quota.max_bytes -v -1 .
 expect_false setfattr -n ceph.quota.max_bytes -v -9223372036854775808 .
 expect_false setfattr -n ceph.quota.max_bytes -v -9223372036854775809 .
 
+setfattr -n ceph.quota.max_bytes -v 0 .
+setfattr -n ceph.quota.max_bytes -v 1Ti .
+setfattr -n ceph.quota.max_bytes -v 8388607Ti .
+expect_false setfattr -n ceph.quota.max_bytes -v 8388608Ti .
+expect_false setfattr -n ceph.quota.max_bytes -v -1Ti .
+expect_false setfattr -n ceph.quota.max_bytes -v -8388609Ti .
+expect_false setfattr -n ceph.quota.max_bytes -v -8388610Ti .
+
 setfattr -n ceph.quota.max_files -v 0 .
 setfattr -n ceph.quota.max_files -v 1 .
 setfattr -n ceph.quota.max_files -v 9223372036854775807 .
diff --git a/qa/workunits/kernel_untar_build.sh b/qa/workunits/kernel_untar_build.sh
index 9b60f065c..602ce04a7 100755
--- a/qa/workunits/kernel_untar_build.sh
+++ b/qa/workunits/kernel_untar_build.sh
@@ -2,11 +2,11 @@
 
 set -e
 
-wget -O linux.tar.gz http://download.ceph.com/qa/linux-5.4.tar.gz
+wget -O linux.tar.xz http://download.ceph.com/qa/linux-6.5.11.tar.xz
 
 mkdir t
 cd t
-tar xzf ../linux.tar.gz
+tar xJf ../linux.tar.xz
 cd linux*
 make defconfig
 make -j`grep -c processor /proc/cpuinfo`
diff --git a/qa/workunits/mon/config.sh b/qa/workunits/mon/config.sh
index 1b00201ae..10cbe5630 100755
--- a/qa/workunits/mon/config.sh
+++ b/qa/workunits/mon/config.sh
@@ -98,11 +98,11 @@ ceph tell osd.0 config unset debug_asok
 ceph tell osd.0 config unset debug_asok
 
 ceph config rm osd.0 debug_asok
-while ceph config show osd.0 | grep debug_asok | grep mon
+while ceph config show osd.0 | grep '^debug_asok[:[space]:]' | grep mon
 do
     sleep 1
 done
-ceph config show osd.0 | grep -c debug_asok | grep 0
+ceph config show osd.0 | grep -c '^debug_asok[:[space]:]' | grep 0
 
 ceph config set osd.0 osd_scrub_cost 123
 while ! ceph config show osd.0 | grep osd_scrub_cost | grep mon
@@ -111,6 +111,13 @@ do
 done
 ceph config rm osd.0 osd_scrub_cost
 
+#RGW daemons test config set
+ceph config set client.rgw debug_rgw 22
+while ! ceph config show client.rgw | grep debug_rgw | grep 22 | grep mon
+do
+    sleep 1
+done
+
 # show-with-defaults
 ceph config show-with-defaults osd.0 | grep debug_asok
 
@@ -130,6 +137,21 @@ rm -f $t1 $t2
 
 expect_false ceph config reset
 expect_false ceph config reset -1
+
+
+# test parallel config set
+# reproducer for https://tracker.ceph.com/issues/62832
+ceph config reset 0
+for ((try = 0; try < 10; try++)); do
+    set +x
+    for ((i = 0; i < 100; i++)); do
+        # Use a config that will get "handled" by the Objecter instantiated by the ceph binary
+        ceph config set client rados_mon_op_timeout $((i+300)) &
+    done 2> /dev/null
+    set -x
+    wait
+done
+
 # we are at end of testing, so it's okay to revert everything
 ceph config reset 0
 
diff --git a/qa/workunits/mon/rbd_snaps_ops.sh b/qa/workunits/mon/rbd_snaps_ops.sh
index eb88565ea..0e5b16b7b 100755
--- a/qa/workunits/mon/rbd_snaps_ops.sh
+++ b/qa/workunits/mon/rbd_snaps_ops.sh
@@ -36,6 +36,7 @@ expect 'rbd --pool=test snap ls image' 0
 expect 'rbd --pool=test snap rm image@snapshot' 0
 
 expect 'ceph osd pool mksnap test snapshot' 22
+expect 'rados -p test mksnap snapshot' 1
 
 expect 'ceph osd pool delete test test --yes-i-really-really-mean-it' 0
 
@@ -52,6 +53,8 @@ expect 'rbd --pool test-foo snap create image@snapshot' 0
 ceph osd pool delete test-bar test-bar --yes-i-really-really-mean-it || true
 expect 'ceph osd pool create test-bar 8' 0
 expect 'ceph osd pool application enable test-bar rbd'
+# "rados cppool" without --yes-i-really-mean-it should fail
+expect 'rados cppool test-foo test-bar' 1
 expect 'rados cppool test-foo test-bar --yes-i-really-mean-it' 0
 expect 'rbd --pool test-bar snap rm image@snapshot' 95
 expect 'ceph osd pool delete test-foo test-foo --yes-i-really-really-mean-it' 0
diff --git a/qa/workunits/rbd/cli_generic.sh b/qa/workunits/rbd/cli_generic.sh
index 57279d26d..15c47074d 100755
--- a/qa/workunits/rbd/cli_generic.sh
+++ b/qa/workunits/rbd/cli_generic.sh
@@ -432,6 +432,7 @@ test_trash() {
     rbd trash mv test2
     ID=`rbd trash ls | cut -d ' ' -f 1`
     rbd info --image-id $ID | grep "rbd image 'test2'"
+    rbd children --image-id $ID | wc -l | grep 0
 
     rbd trash restore $ID
     rbd ls | grep test2
@@ -449,6 +450,7 @@ test_trash() {
     rbd create $RBD_CREATE_ARGS -s 1 test1
     rbd snap create test1@snap1
     rbd snap protect test1@snap1
+    rbd clone test1@snap1 clone
     rbd trash mv test1
 
     rbd trash ls | grep test1
@@ -459,7 +461,10 @@ test_trash() {
     ID=`rbd trash ls | cut -d ' ' -f 1`
     rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 1
     rbd snap ls --image-id $ID | grep '.*snap1.*'
+    rbd children --image-id $ID | wc -l | grep 1
+    rbd children --image-id $ID | grep 'clone'
 
+    rbd rm clone
     rbd snap unprotect --image-id $ID --snap snap1
     rbd snap rm --image-id $ID --snap snap1
     rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 0
@@ -1261,7 +1266,6 @@ test_trash_purge_schedule_recovery() {
 	jq 'select(.name == "rbd_support")' |
 	jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
     ceph osd blocklist add $CLIENT_ADDR
-    ceph osd blocklist ls | grep $CLIENT_ADDR
 
     # Check that you can add a trash purge schedule after a few retries
     expect_fail rbd trash purge schedule add -p rbd3 10m
@@ -1420,7 +1424,6 @@ test_mirror_snapshot_schedule_recovery() {
 	jq 'select(.name == "rbd_support")' |
 	jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
     ceph osd blocklist add $CLIENT_ADDR
-    ceph osd blocklist ls | grep $CLIENT_ADDR
 
     # Check that you can add a mirror snapshot schedule after a few retries
     expect_fail rbd mirror snapshot schedule add -p rbd3/ns1 --image test1 2m
@@ -1529,7 +1532,6 @@ test_perf_image_iostat_recovery() {
 	jq 'select(.name == "rbd_support")' |
 	jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
     ceph osd blocklist add $CLIENT_ADDR
-    ceph osd blocklist ls | grep $CLIENT_ADDR
 
     expect_fail rbd perf image iostat --format json rbd3/ns
     sleep 10
@@ -1661,7 +1663,6 @@ test_tasks_recovery() {
 	jq 'select(.name == "rbd_support")' |
 	jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add')
     ceph osd blocklist add $CLIENT_ADDR
-    ceph osd blocklist ls | grep $CLIENT_ADDR
 
     expect_fail ceph rbd task add flatten rbd2/clone1
     sleep 10
diff --git a/qa/workunits/rbd/compare_mirror_image_alternate_primary.sh b/qa/workunits/rbd/compare_mirror_image_alternate_primary.sh
new file mode 100755
index 000000000..78a390230
--- /dev/null
+++ b/qa/workunits/rbd/compare_mirror_image_alternate_primary.sh
@@ -0,0 +1,106 @@
+#!/usr/bin/env bash
+
+set -ex
+
+IMAGE=image-alternate-primary
+MIRROR_IMAGE_MODE=snapshot
+MIRROR_POOL_MODE=image
+MOUNT=test-alternate-primary
+RBD_IMAGE_FEATURES='layering,exclusive-lock,object-map,fast-diff'
+RBD_MIRROR_INSTANCES=1
+RBD_MIRROR_MODE=snapshot
+RBD_MIRROR_USE_EXISTING_CLUSTER=1
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+take_mirror_snapshots() {
+  local cluster=$1
+  local pool=$2
+  local image=$3
+
+  for i in {1..30}; do
+    mirror_image_snapshot $cluster $pool $image
+    sleep 3
+  done
+}
+
+slow_untar_workload() {
+  local mountpt=$1
+
+  cp linux-5.4.tar.gz $mountpt
+  # run workload that updates the data and metadata of multiple files on disk.
+  # rate limit the workload such that the mirror snapshots can be taken as the
+  # contents of the image are progressively changed by the workload.
+  local ret=0
+  timeout 5m bash -c "zcat $mountpt/linux-5.4.tar.gz \
+    | pv -L 256K | tar xf - -C $mountpt" || ret=$?
+  if ((ret != 124)); then
+    echo "Workload completed prematurely"
+    return 1
+  fi
+}
+
+setup
+
+start_mirrors ${CLUSTER1}
+start_mirrors ${CLUSTER2}
+
+# initial setup
+create_image_and_enable_mirror ${CLUSTER1} ${POOL} ${IMAGE} \
+  ${RBD_MIRROR_MODE} 10G
+
+if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
+  DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t nbd \
+           -o try-netlink ${POOL}/${IMAGE})
+elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
+  DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t krbd \
+           ${POOL}/${IMAGE})
+else
+  echo "Unknown RBD_DEVICE_TYPE: ${RBD_DEVICE_TYPE}"
+  exit 1
+fi
+sudo mkfs.ext4 ${DEV}
+mkdir ${MOUNT}
+
+wget https://download.ceph.com/qa/linux-5.4.tar.gz
+
+for i in {1..25}; do
+  # create mirror snapshots every few seconds under I/O
+  sudo mount ${DEV} ${MOUNT}
+  sudo chown $(whoami) ${MOUNT}
+  rm -rf ${MOUNT}/*
+  take_mirror_snapshots ${CLUSTER1} ${POOL} ${IMAGE} &
+  SNAP_PID=$!
+  slow_untar_workload ${MOUNT}
+  wait $SNAP_PID
+  sudo umount ${MOUNT}
+
+  # calculate hash before demotion of primary image
+  DEMOTE_MD5=$(sudo md5sum ${DEV} | awk '{print $1}')
+  sudo rbd --cluster ${CLUSTER1} device unmap -t ${RBD_DEVICE_TYPE} ${DEV}
+
+  demote_image ${CLUSTER1} ${POOL} ${IMAGE}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${IMAGE} 'up+unknown'
+  wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${IMAGE} 'up+unknown'
+  promote_image ${CLUSTER2} ${POOL} ${IMAGE}
+
+  # calculate hash after promotion of secondary image
+  if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
+    DEV=$(sudo rbd --cluster ${CLUSTER2} device map -t nbd \
+             -o try-netlink ${POOL}/${IMAGE})
+  elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
+    DEV=$(sudo rbd --cluster ${CLUSTER2} device map -t krbd ${POOL}/${IMAGE})
+  fi
+  PROMOTE_MD5=$(sudo md5sum ${DEV} | awk '{print $1}')
+
+  if [[ "${DEMOTE_MD5}" != "${PROMOTE_MD5}" ]]; then
+    echo "Mismatch at iteration ${i}: ${DEMOTE_MD5} != ${PROMOTE_MD5}"
+    exit 1
+  fi
+
+  TEMP=${CLUSTER1}
+  CLUSTER1=${CLUSTER2}
+  CLUSTER2=${TEMP}
+done
+
+echo OK
diff --git a/qa/workunits/rbd/compare_mirror_images.sh b/qa/workunits/rbd/compare_mirror_images.sh
new file mode 100755
index 000000000..cbaa77a71
--- /dev/null
+++ b/qa/workunits/rbd/compare_mirror_images.sh
@@ -0,0 +1,170 @@
+#!/usr/bin/env bash
+
+set -ex
+
+IMG_PREFIX=image-primary
+MIRROR_IMAGE_MODE=snapshot
+MIRROR_POOL_MODE=image
+MNTPT_PREFIX=test-primary
+RBD_IMAGE_FEATURES='layering,exclusive-lock,object-map,fast-diff'
+RBD_MIRROR_INSTANCES=1
+RBD_MIRROR_MODE=snapshot
+RBD_MIRROR_USE_EXISTING_CLUSTER=1
+
+. $(dirname $0)/rbd_mirror_helpers.sh
+
+take_mirror_snapshots() {
+  local cluster=$1
+  local pool=$2
+  local image=$3
+
+  for i in {1..30}; do
+    mirror_image_snapshot $cluster $pool $image
+    sleep 3
+  done
+}
+
+slow_untar_workload() {
+  local mountpt=$1
+
+  cp linux-5.4.tar.gz $mountpt
+  # run workload that updates the data and metadata of multiple files on disk.
+  # rate limit the workload such that the mirror snapshots can be taken as the
+  # contents of the image are progressively changed by the workload.
+  local ret=0
+  timeout 5m bash -c "zcat $mountpt/linux-5.4.tar.gz \
+    | pv -L 256K | tar xf - -C $mountpt" || ret=$?
+  if ((ret != 124)); then
+    echo "Workload completed prematurely"
+    return 1
+  fi
+}
+
+wait_for_image_removal() {
+  local cluster=$1
+  local pool=$2
+  local image=$3
+
+  for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do
+    if ! rbd --cluster $cluster ls $pool | grep -wq $image; then
+      return 0
+    fi
+    sleep $s
+  done
+
+  echo "image ${pool}/${image} not removed from cluster ${cluster}"
+  return 1
+}
+
+compare_demoted_promoted_image() {
+  local dev=${DEVS[$1-1]}
+  local img=${IMG_PREFIX}$1
+  local mntpt=${MNTPT_PREFIX}$1
+  local demote_md5 promote_md5
+
+  sudo umount ${mntpt}
+
+  # calculate hash before demotion of primary image
+  demote_md5=$(sudo md5sum ${dev} | awk '{print $1}')
+  sudo rbd --cluster ${CLUSTER1} device unmap -t ${RBD_DEVICE_TYPE} \
+      ${POOL}/${img}
+
+  demote_image ${CLUSTER1} ${POOL} ${img}
+  wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${img} 'up+unknown'
+  wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${img} 'up+unknown'
+  promote_image ${CLUSTER2} ${POOL} ${img}
+
+  # calculate hash after promotion of secondary image
+  if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
+    dev=$(sudo rbd --cluster ${CLUSTER2} device map -t nbd \
+             -o try-netlink ${POOL}/${img})
+  elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
+    dev=$(sudo rbd --cluster ${CLUSTER2} device map -t krbd ${POOL}/${img})
+  fi
+  promote_md5=$(sudo md5sum ${dev} | awk '{print $1}')
+  sudo rbd --cluster ${CLUSTER2} device unmap -t ${RBD_DEVICE_TYPE} ${dev}
+
+  if [[ "${demote_md5}" != "${promote_md5}" ]]; then
+    echo "Mismatch for image ${POOL}/${img}: ${demote_md5} != ${promote_md5}"
+    return 1
+  fi
+}
+
+setup
+
+start_mirrors ${CLUSTER1}
+start_mirrors ${CLUSTER2}
+
+wget https://download.ceph.com/qa/linux-5.4.tar.gz
+
+for i in {1..10}; do
+  DEVS=()
+  SNAP_PIDS=()
+  COMPARE_PIDS=()
+  WORKLOAD_PIDS=()
+  RET=0
+  for j in {1..10}; do
+    IMG=${IMG_PREFIX}${j}
+    MNTPT=${MNTPT_PREFIX}${j}
+    create_image_and_enable_mirror ${CLUSTER1} ${POOL} ${IMG} \
+      ${RBD_MIRROR_MODE} 10G
+    if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
+      DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t nbd \
+	      -o try-netlink ${POOL}/${IMG})
+    elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
+      DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t krbd \
+	      ${POOL}/${IMG})
+    else
+      echo "Unknown RBD_DEVICE_TYPE: ${RBD_DEVICE_TYPE}"
+      exit 1
+    fi
+    DEVS+=($DEV)
+    sudo mkfs.ext4 ${DEV}
+    mkdir ${MNTPT}
+    sudo mount ${DEV} ${MNTPT}
+    sudo chown $(whoami) ${MNTPT}
+    # create mirror snapshots under I/O every few seconds
+    take_mirror_snapshots ${CLUSTER1} ${POOL} ${IMG} &
+    SNAP_PIDS+=($!)
+    slow_untar_workload ${MNTPT} &
+    WORKLOAD_PIDS+=($!)
+  done
+  for pid in ${SNAP_PIDS[@]}; do
+    wait $pid || RET=$?
+  done
+  if ((RET != 0)); then
+    echo "take_mirror_snapshots failed"
+    exit 1
+  fi
+  for pid in ${WORKLOAD_PIDS[@]}; do
+    wait $pid || RET=$?
+  done
+  if ((RET != 0)); then
+    echo "slow_untar_workload failed"
+    exit 1
+  fi
+
+  for j in {1..10}; do
+    compare_demoted_promoted_image $j &
+    COMPARE_PIDS+=($!)
+  done
+  for pid in ${COMPARE_PIDS[@]}; do
+    wait $pid || RET=$?
+  done
+  if ((RET != 0)); then
+    echo "compare_demoted_promoted_image failed"
+    exit 1
+  fi
+
+  for j in {1..10}; do
+    IMG=${IMG_PREFIX}${j}
+    # Allow for removal of non-primary image by checking that mirroring
+    # image status is "up+replaying"
+    wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${IMG} 'up+replaying'
+    remove_image ${CLUSTER2} ${POOL} ${IMG}
+    wait_for_image_removal ${CLUSTER1} ${POOL} ${IMG}
+    rm -rf ${MNTPT_PREFIX}${j}
+  done
+done
+
+echo OK
diff --git a/qa/workunits/rbd/rbd-nbd.sh b/qa/workunits/rbd/rbd-nbd.sh
index bc89e9be5..8e1b05b3f 100755
--- a/qa/workunits/rbd/rbd-nbd.sh
+++ b/qa/workunits/rbd/rbd-nbd.sh
@@ -202,8 +202,11 @@ provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} |
 used=`rbd -p ${POOL} --format xml du ${IMAGE} |
   $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .`
 [ "${used}" -lt "${provisioned}" ]
+unmap_device ${DEV} ${PID}
 
 # resize test
+DEV=`_sudo rbd device -t nbd -o try-netlink map ${POOL}/${IMAGE}`
+get_pid ${POOL}
 devname=$(basename ${DEV})
 blocks=$(awk -v dev=${devname} '$4 == dev {print $3}' /proc/partitions)
 test -n "${blocks}"
@@ -216,9 +219,9 @@ rbd resize ${POOL}/${IMAGE} --allow-shrink --size ${SIZE}M
 blocks2=$(awk -v dev=${devname} '$4 == dev {print $3}' /proc/partitions)
 test -n "${blocks2}"
 test ${blocks2} -eq ${blocks}
+unmap_device ${DEV} ${PID}
 
 # read-only option test
-unmap_device ${DEV} ${PID}
 DEV=`_sudo rbd --device-type nbd map --read-only ${POOL}/${IMAGE}`
 PID=$(rbd device --device-type nbd list | awk -v pool=${POOL} -v img=${IMAGE} -v dev=${DEV} \
     '$2 == pool && $3 == img && $5 == dev {print $1}')
diff --git a/qa/workunits/rbd/rbd_mirror_bootstrap.sh b/qa/workunits/rbd/rbd_mirror_bootstrap.sh
index 6ef06f2b8..f4c1070bc 100755
--- a/qa/workunits/rbd/rbd_mirror_bootstrap.sh
+++ b/qa/workunits/rbd/rbd_mirror_bootstrap.sh
@@ -1,8 +1,10 @@
-#!/bin/sh -ex
+#!/usr/bin/env bash
 #
 # rbd_mirror_bootstrap.sh - test peer bootstrap create/import
 #
 
+set -ex
+
 RBD_MIRROR_MANUAL_PEERS=1
 RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-1}
 . $(dirname $0)/rbd_mirror_helpers.sh
diff --git a/qa/workunits/rbd/rbd_mirror_fsx_compare.sh b/qa/workunits/rbd/rbd_mirror_fsx_compare.sh
index 0ba3c97d7..79c36546d 100755
--- a/qa/workunits/rbd/rbd_mirror_fsx_compare.sh
+++ b/qa/workunits/rbd/rbd_mirror_fsx_compare.sh
@@ -1,10 +1,12 @@
-#!/bin/sh -ex
+#!/usr/bin/env bash
 #
 # rbd_mirror_fsx_compare.sh - test rbd-mirror daemon under FSX workload
 #
 # The script is used to compare FSX-generated images between two clusters.
 #
 
+set -ex
+
 . $(dirname $0)/rbd_mirror_helpers.sh
 
 trap 'cleanup $?' INT TERM EXIT
diff --git a/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh b/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh
index d988987ba..6daadbbb4 100755
--- a/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh
+++ b/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh
@@ -1,10 +1,12 @@
-#!/bin/sh -ex
+#!/usr/bin/env bash
 #
 # rbd_mirror_fsx_prepare.sh - test rbd-mirror daemon under FSX workload
 #
 # The script is used to compare FSX-generated images between two clusters.
 #
 
+set -ex
+
 . $(dirname $0)/rbd_mirror_helpers.sh
 
 setup
diff --git a/qa/workunits/rbd/rbd_mirror_ha.sh b/qa/workunits/rbd/rbd_mirror_ha.sh
index 37739a83d..1e43712a6 100755
--- a/qa/workunits/rbd/rbd_mirror_ha.sh
+++ b/qa/workunits/rbd/rbd_mirror_ha.sh
@@ -1,8 +1,10 @@
-#!/bin/sh -ex
+#!/usr/bin/env bash
 #
 # rbd_mirror_ha.sh - test rbd-mirror daemons in HA mode
 #
 
+set -ex
+
 RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-7}
 
 . $(dirname $0)/rbd_mirror_helpers.sh
diff --git a/qa/workunits/rbd/rbd_mirror_helpers.sh b/qa/workunits/rbd/rbd_mirror_helpers.sh
index f4961b925..b6abff96d 100755
--- a/qa/workunits/rbd/rbd_mirror_helpers.sh
+++ b/qa/workunits/rbd/rbd_mirror_helpers.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/usr/bin/env bash
 #
 # rbd_mirror_helpers.sh - shared rbd-mirror daemon helper functions
 #
@@ -814,23 +814,23 @@ test_status_in_pool_dir()
     local description_pattern="$5"
     local service_pattern="$6"
 
-    local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}-${image}.mirror_status)
-    CEPH_ARGS='' rbd --cluster ${cluster} mirror image status ${pool}/${image} |
-        tee ${status_log} >&2
-    grep "^  state: .*${state_pattern}" ${status_log} || return 1
-    grep "^  description: .*${description_pattern}" ${status_log} || return 1
+    local status
+    status=$(CEPH_ARGS='' rbd --cluster ${cluster} mirror image status \
+                 ${pool}/${image})
+    grep "^  state: .*${state_pattern}" <<< "$status" || return 1
+    grep "^  description: .*${description_pattern}" <<< "$status" || return 1
 
     if [ -n "${service_pattern}" ]; then
-        grep "service: *${service_pattern}" ${status_log} || return 1
+        grep "service: *${service_pattern}" <<< "$status" || return 1
     elif echo ${state_pattern} | grep '^up+'; then
-        grep "service: *${MIRROR_USER_ID_PREFIX}.* on " ${status_log} || return 1
+        grep "service: *${MIRROR_USER_ID_PREFIX}.* on " <<< "$status" || return 1
     else
-        grep "service: " ${status_log} && return 1
+        grep "service: " <<< "$status" && return 1
     fi
 
     # recheck using `mirror pool status` command to stress test it.
-
-    local last_update="$(sed -nEe 's/^  last_update: *(.*) *$/\1/p' ${status_log})"
+    local last_update
+    last_update="$(sed -nEe 's/^  last_update: *(.*) *$/\1/p' <<< "$status")"
     test_mirror_pool_status_verbose \
         ${cluster} ${pool} ${image} "${state_pattern}" "${last_update}" &&
     return 0
@@ -847,16 +847,15 @@ test_mirror_pool_status_verbose()
     local state_pattern="$4"
     local prev_last_update="$5"
 
-    local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}.mirror_status)
-
-    rbd --cluster ${cluster} mirror pool status ${pool} --verbose --format xml \
-        > ${status_log}
+    local status
+    status=$(CEPH_ARGS='' rbd --cluster ${cluster} mirror pool status ${pool} \
+                 --verbose --format xml)
 
     local last_update state
     last_update=$($XMLSTARLET sel -t -v \
-        "//images/image[name='${image}']/last_update" < ${status_log})
+        "//images/image[name='${image}']/last_update" <<< "$status")
     state=$($XMLSTARLET sel -t -v \
-        "//images/image[name='${image}']/state" < ${status_log})
+        "//images/image[name='${image}']/state" <<< "$status")
 
     echo "${state}" | grep "${state_pattern}" ||
     test "${last_update}" '>' "${prev_last_update}"
diff --git a/qa/workunits/rbd/rbd_mirror_journal.sh b/qa/workunits/rbd/rbd_mirror_journal.sh
index 54f6aeec8..20a3b87db 100755
--- a/qa/workunits/rbd/rbd_mirror_journal.sh
+++ b/qa/workunits/rbd/rbd_mirror_journal.sh
@@ -1,4 +1,4 @@
-#!/bin/sh -ex
+#!/usr/bin/env bash
 #
 # rbd_mirror_journal.sh - test rbd-mirror daemon in journal-based mirroring mode
 #
@@ -7,6 +7,8 @@
 # socket, temporary files, and launches rbd-mirror daemon.
 #
 
+set -ex
+
 . $(dirname $0)/rbd_mirror_helpers.sh
 
 setup
diff --git a/qa/workunits/rbd/rbd_mirror_snapshot.sh b/qa/workunits/rbd/rbd_mirror_snapshot.sh
index c70d48b09..17164c4d5 100755
--- a/qa/workunits/rbd/rbd_mirror_snapshot.sh
+++ b/qa/workunits/rbd/rbd_mirror_snapshot.sh
@@ -1,4 +1,4 @@
-#!/bin/sh -ex
+#!/usr/bin/env bash
 #
 # rbd_mirror_snapshot.sh - test rbd-mirror daemon in snapshot-based mirroring mode
 #
@@ -7,6 +7,8 @@
 # socket, temporary files, and launches rbd-mirror daemon.
 #
 
+set -ex
+
 MIRROR_POOL_MODE=image
 MIRROR_IMAGE_MODE=snapshot
 
diff --git a/qa/workunits/rbd/rbd_mirror_stress.sh b/qa/workunits/rbd/rbd_mirror_stress.sh
index cb79aba7e..ea39d3aae 100755
--- a/qa/workunits/rbd/rbd_mirror_stress.sh
+++ b/qa/workunits/rbd/rbd_mirror_stress.sh
@@ -1,4 +1,4 @@
-#!/bin/sh -ex
+#!/usr/bin/env bash
 #
 # rbd_mirror_stress.sh - stress test rbd-mirror daemon
 #
@@ -8,6 +8,8 @@
 #                             tool during the many image test
 #
 
+set -ex
+
 IMAGE_COUNT=50
 export LOCKDEP=0